zlaqr2.f (flens/lapack/interface/ref

  1       SUBROUTINE ZLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,

  2      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,

  3      $                   NV, WV, LDWV, WORK, LWORK )

  4 *

  5 *  -- LAPACK auxiliary routine (version 3.2.1)                        --

  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..

  7 *  -- April 2009                                                      --

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,

 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW

 12       LOGICAL            WANTT, WANTZ

 13 *     ..

 14 *     .. Array Arguments ..

 15       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),

 16      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )

 17 *     ..

 18 *

 19 *     This subroutine is identical to ZLAQR3 except that it avoids

 20 *     recursion by calling ZLAHQR instead of ZLAQR4.

 21 *

 22 *

 23 *     ******************************************************************

 24 *     Aggressive early deflation:

 25 *

 26 *     This subroutine accepts as input an upper Hessenberg matrix

 27 *     H and performs an unitary similarity transformation

 28 *     designed to detect and deflate fully converged eigenvalues from

 29 *     a trailing principal submatrix.  On output H has been over-

 30 *     written by a new Hessenberg matrix that is a perturbation of

 31 *     an unitary similarity transformation of H.  It is to be

 32 *     hoped that the final version of H has many zero subdiagonal

 33 *     entries.

 34 *

 35 *     ******************************************************************

 36 *     WANTT   (input) LOGICAL

 37 *          If .TRUE., then the Hessenberg matrix H is fully updated

 38 *          so that the triangular Schur factor may be

 39 *          computed (in cooperation with the calling subroutine).

 40 *          If .FALSE., then only enough of H is updated to preserve

 41 *          the eigenvalues.

 42 *

 43 *     WANTZ   (input) LOGICAL

 44 *          If .TRUE., then the unitary matrix Z is updated so

 45 *          so that the unitary Schur factor may be computed

 46 *          (in cooperation with the calling subroutine).

 47 *          If .FALSE., then Z is not referenced.

 48 *

 49 *     N       (input) INTEGER

 50 *          The order of the matrix H and (if WANTZ is .TRUE.) the

 51 *          order of the unitary matrix Z.

 52 *

 53 *     KTOP    (input) INTEGER

 54 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.

 55 *          KBOT and KTOP together determine an isolated block

 56 *          along the diagonal of the Hessenberg matrix.

 57 *

 58 *     KBOT    (input) INTEGER

 59 *          It is assumed without a check that either

 60 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

 61 *          determine an isolated block along the diagonal of the

 62 *          Hessenberg matrix.

 63 *

 64 *     NW      (input) INTEGER

 65 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

 66 *

 67 *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)

 68 *          On input the initial N-by-N section of H stores the

 69 *          Hessenberg matrix undergoing aggressive early deflation.

 70 *          On output H has been transformed by a unitary

 71 *          similarity transformation, perturbed, and the returned

 72 *          to Hessenberg form that (it is to be hoped) has some

 73 *          zero subdiagonal entries.

 74 *

 75 *     LDH     (input) integer

 76 *          Leading dimension of H just as declared in the calling

 77 *          subroutine.  N .LE. LDH

 78 *

 79 *     ILOZ    (input) INTEGER

 80 *     IHIZ    (input) INTEGER

 81 *          Specify the rows of Z to which transformations must be

 82 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

 83 *

 84 *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)

 85 *          IF WANTZ is .TRUE., then on output, the unitary

 86 *          similarity transformation mentioned above has been

 87 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

 88 *          If WANTZ is .FALSE., then Z is unreferenced.

 89 *

 90 *     LDZ     (input) integer

 91 *          The leading dimension of Z just as declared in the

 92 *          calling subroutine.  1 .LE. LDZ.

 93 *

 94 *     NS      (output) integer

 95 *          The number of unconverged (ie approximate) eigenvalues

 96 *          returned in SR and SI that may be used as shifts by the

 97 *          calling subroutine.

 98 *

 99 *     ND      (output) integer

100 *          The number of converged eigenvalues uncovered by this

101 *          subroutine.

102 *

103 *     SH      (output) COMPLEX*16 array, dimension KBOT

104 *          On output, approximate eigenvalues that may

105 *          be used for shifts are stored in SH(KBOT-ND-NS+1)

106 *          through SR(KBOT-ND).  Converged eigenvalues are

107 *          stored in SH(KBOT-ND+1) through SH(KBOT).

108 *

109 *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)

110 *          An NW-by-NW work array.

111 *

112 *     LDV     (input) integer scalar

113 *          The leading dimension of V just as declared in the

114 *          calling subroutine.  NW .LE. LDV

115 *

116 *     NH      (input) integer scalar

117 *          The number of columns of T.  NH.GE.NW.

118 *

119 *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)

120 *

121 *     LDT     (input) integer

122 *          The leading dimension of T just as declared in the

123 *          calling subroutine.  NW .LE. LDT

124 *

125 *     NV      (input) integer

126 *          The number of rows of work array WV available for

127 *          workspace.  NV.GE.NW.

128 *

129 *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)

130 *

131 *     LDWV    (input) integer

132 *          The leading dimension of W just as declared in the

133 *          calling subroutine.  NW .LE. LDV

134 *

135 *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.

136 *          On exit, WORK(1) is set to an estimate of the optimal value

137 *          of LWORK for the given values of N, NW, KTOP and KBOT.

138 *

139 *     LWORK   (input) integer

140 *          The dimension of the work array WORK.  LWORK = 2*NW

141 *          suffices, but greater efficiency may result from larger

142 *          values of LWORK.

143 *

144 *          If LWORK = -1, then a workspace query is assumed; ZLAQR2

145 *          only estimates the optimal workspace size for the given

146 *          values of N, NW, KTOP and KBOT.  The estimate is returned

147 *          in WORK(1).  No error message related to LWORK is issued

148 *          by XERBLA.  Neither H nor Z are accessed.

149 *

150 *     ================================================================

151 *     Based on contributions by

152 *        Karen Braman and Ralph Byers, Department of Mathematics,

153 *        University of Kansas, USA

154 *

155 *     ================================================================

156 *     .. Parameters ..

157       COMPLEX*16         ZERO, ONE

158       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),

159      $                   ONE = ( 1.0d0, 0.0d0 ) )

160       DOUBLE PRECISION   RZERO, RONE

161       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )

162 *     ..

163 *     .. Local Scalars ..

164       COMPLEX*16         BETA, CDUM, S, TAU

165       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP

166       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,

167      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWKOPT

168 *     ..

169 *     .. External Functions ..

170       DOUBLE PRECISION   DLAMCH

171       EXTERNAL           DLAMCH

172 *     ..

173 *     .. External Subroutines ..

174       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,

175      $                   ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR

176 *     ..

177 *     .. Intrinsic Functions ..

178       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN

179 *     ..

180 *     .. Statement Functions ..

181       DOUBLE PRECISION   CABS1

182 *     ..

183 *     .. Statement Function definitions ..

184       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )

185 *     ..

186 *     .. Executable Statements ..

187 *

188 *     ==== Estimate optimal workspace. ====

189 *

190       JW = MIN( NW, KBOT-KTOP+1 )

191       IF( JW.LE.2 ) THEN

192          LWKOPT = 1

193       ELSE

194 *

195 *        ==== Workspace query call to ZGEHRD ====

196 *

197          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )

198          LWK1 = INT( WORK( 1 ) )

199 *

200 *        ==== Workspace query call to ZUNMHR ====

201 *

202          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,

203      $                WORK, -1, INFO )

204          LWK2 = INT( WORK( 1 ) )

205 *

206 *        ==== Optimal workspace ====

207 *

208          LWKOPT = JW + MAX( LWK1, LWK2 )

209       END IF

210 *

211 *     ==== Quick return in case of workspace query. ====

212 *

213       IF( LWORK.EQ.-1 ) THEN

214          WORK( 1 ) = DCMPLX( LWKOPT, 0 )

215          RETURN

216       END IF

217 *

218 *     ==== Nothing to do ...

219 *     ... for an empty active block ... ====

220       NS = 0

221       ND = 0

222       WORK( 1 ) = ONE

223       IF( KTOP.GT.KBOT )

224      $   RETURN

225 *     ... nor for an empty deflation window. ====

226       IF( NW.LT.1 )

227      $   RETURN

228 *

229 *     ==== Machine constants ====

230 *

231       SAFMIN = DLAMCH( 'SAFE MINIMUM' )

232       SAFMAX = RONE / SAFMIN

233       CALL DLABAD( SAFMIN, SAFMAX )

234       ULP = DLAMCH( 'PRECISION' )

235       SMLNUM = SAFMIN*( DBLE( N ) / ULP )

236 *

237 *     ==== Setup deflation window ====

238 *

239       JW = MIN( NW, KBOT-KTOP+1 )

240       KWTOP = KBOT - JW + 1

241       IF( KWTOP.EQ.KTOP ) THEN

242          S = ZERO

243       ELSE

244          S = H( KWTOP, KWTOP-1 )

245       END IF

246 *

247       IF( KBOT.EQ.KWTOP ) THEN

248 *

249 *        ==== 1-by-1 deflation window: not much to do ====

250 *

251          SH( KWTOP ) = H( KWTOP, KWTOP )

252          NS = 1

253          ND = 0

254          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,

255      $       KWTOP ) ) ) ) THEN

256             NS = 0

257             ND = 1

258             IF( KWTOP.GT.KTOP )

259      $         H( KWTOP, KWTOP-1 ) = ZERO

260          END IF

261          WORK( 1 ) = ONE

262          RETURN

263       END IF

264 *

265 *     ==== Convert to spike-triangular form.  (In case of a

266 *     .    rare QR failure, this routine continues to do

267 *     .    aggressive early deflation using that part of

268 *     .    the deflation window that converged using INFQR

269 *     .    here and there to keep track.) ====

270 *

271       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )

272       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )

273 *

274       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )

275       CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,

276      $             JW, V, LDV, INFQR )

277 *

278 *     ==== Deflation detection loop ====

279 *

280       NS = JW

281       ILST = INFQR + 1

282       DO 10 KNT = INFQR + 1, JW

283 *

284 *        ==== Small spike tip deflation test ====

285 *

286          FOO = CABS1( T( NS, NS ) )

287          IF( FOO.EQ.RZERO )

288      $      FOO = CABS1( S )

289          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )

290      $        THEN

291 *

292 *           ==== One more converged eigenvalue ====

293 *

294             NS = NS - 1

295          ELSE

296 *

297 *           ==== One undeflatable eigenvalue.  Move it up out of the

298 *           .    way.   (ZTREXC can not fail in this case.) ====

299 *

300             IFST = NS

301             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )

302             ILST = ILST + 1

303          END IF

304    10 CONTINUE

305 *

306 *        ==== Return to Hessenberg form ====

307 *

308       IF( NS.EQ.0 )

309      $   S = ZERO

310 *

311       IF( NS.LT.JW ) THEN

312 *

313 *        ==== sorting the diagonal of T improves accuracy for

314 *        .    graded matrices.  ====

315 *

316          DO 30 I = INFQR + 1, NS

317             IFST = I

318             DO 20 J = I + 1, NS

319                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )

320      $            IFST = J

321    20       CONTINUE

322             ILST = I

323             IF( IFST.NE.ILST )

324      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )

325    30    CONTINUE

326       END IF

327 *

328 *     ==== Restore shift/eigenvalue array from T ====

329 *

330       DO 40 I = INFQR + 1, JW

331          SH( KWTOP+I-1 ) = T( I, I )

332    40 CONTINUE

333 *

334 *

335       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN

336          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN

337 *

338 *           ==== Reflect spike back into lower triangle ====

339 *

340             CALL ZCOPY( NS, V, LDV, WORK, 1 )

341             DO 50 I = 1, NS

342                WORK( I ) = DCONJG( WORK( I ) )

343    50       CONTINUE

344             BETA = WORK( 1 )

345             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )

346             WORK( 1 ) = ONE

347 *

348             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )

349 *

350             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,

351      $                  WORK( JW+1 ) )

352             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,

353      $                  WORK( JW+1 ) )

354             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,

355      $                  WORK( JW+1 ) )

356 *

357             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),

358      $                   LWORK-JW, INFO )

359          END IF

360 *

361 *        ==== Copy updated reduced window into place ====

362 *

363          IF( KWTOP.GT.1 )

364      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )

365          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )

366          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),

367      $               LDH+1 )

368 *

369 *        ==== Accumulate orthogonal matrix in order update

370 *        .    H and Z, if requested.  ====

371 *

372          IF( NS.GT.1 .AND. S.NE.ZERO )

373      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,

374      $                   WORK( JW+1 ), LWORK-JW, INFO )

375 *

376 *        ==== Update vertical slab in H ====

377 *

378          IF( WANTT ) THEN

379             LTOP = 1

380          ELSE

381             LTOP = KTOP

382          END IF

383          DO 60 KROW = LTOP, KWTOP - 1, NV

384             KLN = MIN( NV, KWTOP-KROW )

385             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),

386      $                  LDH, V, LDV, ZERO, WV, LDWV )

387             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )

388    60    CONTINUE

389 *

390 *        ==== Update horizontal slab in H ====

391 *

392          IF( WANTT ) THEN

393             DO 70 KCOL = KBOT + 1, N, NH

394                KLN = MIN( NH, N-KCOL+1 )

395                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,

396      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )

397                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),

398      $                      LDH )

399    70       CONTINUE

400          END IF

401 *

402 *        ==== Update vertical slab in Z ====

403 *

404          IF( WANTZ ) THEN

405             DO 80 KROW = ILOZ, IHIZ, NV

406                KLN = MIN( NV, IHIZ-KROW+1 )

407                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),

408      $                     LDZ, V, LDV, ZERO, WV, LDWV )

409                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),

410      $                      LDZ )

411    80       CONTINUE

412          END IF

413       END IF

414 *

415 *     ==== Return the number of deflations ... ====

416 *

417       ND = JW - NS

418 *

419 *     ==== ... and the number of shifts. (Subtracting

420 *     .    INFQR from the spike length takes care

421 *     .    of the case of a rare QR failure while

422 *     .    calculating eigenvalues of the deflation

423 *     .    window.)  ====

424 *

425       NS = NS - INFQR

426 *

427 *      ==== Return optimal workspace. ====

428 *

429       WORK( 1 ) = DCMPLX( LWKOPT, 0 )

430 *

431 *     ==== End of ZLAQR2 ====

432 *

433       END