zlaqr3.f (flens/lapack/interface/ref

  1       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,

  2      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,

  3      $                   NV, WV, LDWV, WORK, LWORK )

  4 *

  5 *  -- LAPACK auxiliary routine (version 3.2.1)                        --

  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..

  7 *  -- April 2009                                                      --

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,

 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW

 12       LOGICAL            WANTT, WANTZ

 13 *     ..

 14 *     .. Array Arguments ..

 15       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),

 16      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )

 17 *     ..

 18 *

 19 *     ******************************************************************

 20 *     Aggressive early deflation:

 21 *

 22 *     This subroutine accepts as input an upper Hessenberg matrix

 23 *     H and performs an unitary similarity transformation

 24 *     designed to detect and deflate fully converged eigenvalues from

 25 *     a trailing principal submatrix.  On output H has been over-

 26 *     written by a new Hessenberg matrix that is a perturbation of

 27 *     an unitary similarity transformation of H.  It is to be

 28 *     hoped that the final version of H has many zero subdiagonal

 29 *     entries.

 30 *

 31 *     ******************************************************************

 32 *     WANTT   (input) LOGICAL

 33 *          If .TRUE., then the Hessenberg matrix H is fully updated

 34 *          so that the triangular Schur factor may be

 35 *          computed (in cooperation with the calling subroutine).

 36 *          If .FALSE., then only enough of H is updated to preserve

 37 *          the eigenvalues.

 38 *

 39 *     WANTZ   (input) LOGICAL

 40 *          If .TRUE., then the unitary matrix Z is updated so

 41 *          so that the unitary Schur factor may be computed

 42 *          (in cooperation with the calling subroutine).

 43 *          If .FALSE., then Z is not referenced.

 44 *

 45 *     N       (input) INTEGER

 46 *          The order of the matrix H and (if WANTZ is .TRUE.) the

 47 *          order of the unitary matrix Z.

 48 *

 49 *     KTOP    (input) INTEGER

 50 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.

 51 *          KBOT and KTOP together determine an isolated block

 52 *          along the diagonal of the Hessenberg matrix.

 53 *

 54 *     KBOT    (input) INTEGER

 55 *          It is assumed without a check that either

 56 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together

 57 *          determine an isolated block along the diagonal of the

 58 *          Hessenberg matrix.

 59 *

 60 *     NW      (input) INTEGER

 61 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).

 62 *

 63 *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)

 64 *          On input the initial N-by-N section of H stores the

 65 *          Hessenberg matrix undergoing aggressive early deflation.

 66 *          On output H has been transformed by a unitary

 67 *          similarity transformation, perturbed, and the returned

 68 *          to Hessenberg form that (it is to be hoped) has some

 69 *          zero subdiagonal entries.

 70 *

 71 *     LDH     (input) integer

 72 *          Leading dimension of H just as declared in the calling

 73 *          subroutine.  N .LE. LDH

 74 *

 75 *     ILOZ    (input) INTEGER

 76 *     IHIZ    (input) INTEGER

 77 *          Specify the rows of Z to which transformations must be

 78 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.

 79 *

 80 *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)

 81 *          IF WANTZ is .TRUE., then on output, the unitary

 82 *          similarity transformation mentioned above has been

 83 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.

 84 *          If WANTZ is .FALSE., then Z is unreferenced.

 85 *

 86 *     LDZ     (input) integer

 87 *          The leading dimension of Z just as declared in the

 88 *          calling subroutine.  1 .LE. LDZ.

 89 *

 90 *     NS      (output) integer

 91 *          The number of unconverged (ie approximate) eigenvalues

 92 *          returned in SR and SI that may be used as shifts by the

 93 *          calling subroutine.

 94 *

 95 *     ND      (output) integer

 96 *          The number of converged eigenvalues uncovered by this

 97 *          subroutine.

 98 *

 99 *     SH      (output) COMPLEX*16 array, dimension KBOT

100 *          On output, approximate eigenvalues that may

101 *          be used for shifts are stored in SH(KBOT-ND-NS+1)

102 *          through SR(KBOT-ND).  Converged eigenvalues are

103 *          stored in SH(KBOT-ND+1) through SH(KBOT).

104 *

105 *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)

106 *          An NW-by-NW work array.

107 *

108 *     LDV     (input) integer scalar

109 *          The leading dimension of V just as declared in the

110 *          calling subroutine.  NW .LE. LDV

111 *

112 *     NH      (input) integer scalar

113 *          The number of columns of T.  NH.GE.NW.

114 *

115 *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)

116 *

117 *     LDT     (input) integer

118 *          The leading dimension of T just as declared in the

119 *          calling subroutine.  NW .LE. LDT

120 *

121 *     NV      (input) integer

122 *          The number of rows of work array WV available for

123 *          workspace.  NV.GE.NW.

124 *

125 *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)

126 *

127 *     LDWV    (input) integer

128 *          The leading dimension of W just as declared in the

129 *          calling subroutine.  NW .LE. LDV

130 *

131 *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.

132 *          On exit, WORK(1) is set to an estimate of the optimal value

133 *          of LWORK for the given values of N, NW, KTOP and KBOT.

134 *

135 *     LWORK   (input) integer

136 *          The dimension of the work array WORK.  LWORK = 2*NW

137 *          suffices, but greater efficiency may result from larger

138 *          values of LWORK.

139 *

140 *          If LWORK = -1, then a workspace query is assumed; ZLAQR3

141 *          only estimates the optimal workspace size for the given

142 *          values of N, NW, KTOP and KBOT.  The estimate is returned

143 *          in WORK(1).  No error message related to LWORK is issued

144 *          by XERBLA.  Neither H nor Z are accessed.

145 *

146 *     ================================================================

147 *     Based on contributions by

148 *        Karen Braman and Ralph Byers, Department of Mathematics,

149 *        University of Kansas, USA

150 *

151 *     ================================================================

152 *     .. Parameters ..

153       COMPLEX*16         ZERO, ONE

154       PARAMETER          ( ZERO = ( 0.0d0, 0.0d0 ),

155      $                   ONE = ( 1.0d0, 0.0d0 ) )

156       DOUBLE PRECISION   RZERO, RONE

157       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )

158 *     ..

159 *     .. Local Scalars ..

160       COMPLEX*16         BETA, CDUM, S, TAU

161       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP

162       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,

163      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,

164      $                   LWKOPT, NMIN

165 *     ..

166 *     .. External Functions ..

167       DOUBLE PRECISION   DLAMCH

168       INTEGER            ILAENV

169       EXTERNAL           DLAMCH, ILAENV

170 *     ..

171 *     .. External Subroutines ..

172       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,

173      $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR

174 *     ..

175 *     .. Intrinsic Functions ..

176       INTRINSIC          ABS, DBLE, DCMPLX, DCONJG, DIMAG, INT, MAX, MIN

177 *     ..

178 *     .. Statement Functions ..

179       DOUBLE PRECISION   CABS1

180 *     ..

181 *     .. Statement Function definitions ..

182       CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) )

183 *     ..

184 *     .. Executable Statements ..

185 *

186 *     ==== Estimate optimal workspace. ====

187 *

188       JW = MIN( NW, KBOT-KTOP+1 )

189       IF( JW.LE.2 ) THEN

190          LWKOPT = 1

191       ELSE

192 *

193 *        ==== Workspace query call to ZGEHRD ====

194 *

195          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )

196          LWK1 = INT( WORK( 1 ) )

197 *

198 *        ==== Workspace query call to ZUNMHR ====

199 *

200          CALL ZUNMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,

201      $                WORK, -1, INFO )

202          LWK2 = INT( WORK( 1 ) )

203 *

204 *        ==== Workspace query call to ZLAQR4 ====

205 *

206          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH, 1, JW, V,

207      $                LDV, WORK, -1, INFQR )

208          LWK3 = INT( WORK( 1 ) )

209 *

210 *        ==== Optimal workspace ====

211 *

212          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )

213       END IF

214 *

215 *     ==== Quick return in case of workspace query. ====

216 *

217       IF( LWORK.EQ.-1 ) THEN

218          WORK( 1 ) = DCMPLX( LWKOPT, 0 )

219          RETURN

220       END IF

221 *

222 *     ==== Nothing to do ...

223 *     ... for an empty active block ... ====

224       NS = 0

225       ND = 0

226       WORK( 1 ) = ONE

227       IF( KTOP.GT.KBOT )

228      $   RETURN

229 *     ... nor for an empty deflation window. ====

230       IF( NW.LT.1 )

231      $   RETURN

232 *

233 *     ==== Machine constants ====

234 *

235       SAFMIN = DLAMCH( 'SAFE MINIMUM' )

236       SAFMAX = RONE / SAFMIN

237       CALL DLABAD( SAFMIN, SAFMAX )

238       ULP = DLAMCH( 'PRECISION' )

239       SMLNUM = SAFMIN*( DBLE( N ) / ULP )

240 *

241 *     ==== Setup deflation window ====

242 *

243       JW = MIN( NW, KBOT-KTOP+1 )

244       KWTOP = KBOT - JW + 1

245       IF( KWTOP.EQ.KTOP ) THEN

246          S = ZERO

247       ELSE

248          S = H( KWTOP, KWTOP-1 )

249       END IF

250 *

251       IF( KBOT.EQ.KWTOP ) THEN

252 *

253 *        ==== 1-by-1 deflation window: not much to do ====

254 *

255          SH( KWTOP ) = H( KWTOP, KWTOP )

256          NS = 1

257          ND = 0

258          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,

259      $       KWTOP ) ) ) ) THEN

260             NS = 0

261             ND = 1

262             IF( KWTOP.GT.KTOP )

263      $         H( KWTOP, KWTOP-1 ) = ZERO

264          END IF

265          WORK( 1 ) = ONE

266          RETURN

267       END IF

268 *

269 *     ==== Convert to spike-triangular form.  (In case of a

270 *     .    rare QR failure, this routine continues to do

271 *     .    aggressive early deflation using that part of

272 *     .    the deflation window that converged using INFQR

273 *     .    here and there to keep track.) ====

274 *

275       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )

276       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )

277 *

278       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )

279       NMIN = ILAENV( 12, 'ZLAQR3', 'SV', JW, 1, JW, LWORK )

280       IF( JW.GT.NMIN ) THEN

281          CALL ZLAQR4( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,

282      $                JW, V, LDV, WORK, LWORK, INFQR )

283       ELSE

284          CALL ZLAHQR( .true., .true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,

285      $                JW, V, LDV, INFQR )

286       END IF

287 *

288 *     ==== Deflation detection loop ====

289 *

290       NS = JW

291       ILST = INFQR + 1

292       DO 10 KNT = INFQR + 1, JW

293 *

294 *        ==== Small spike tip deflation test ====

295 *

296          FOO = CABS1( T( NS, NS ) )

297          IF( FOO.EQ.RZERO )

298      $      FOO = CABS1( S )

299          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )

300      $        THEN

301 *

302 *           ==== One more converged eigenvalue ====

303 *

304             NS = NS - 1

305          ELSE

306 *

307 *           ==== One undeflatable eigenvalue.  Move it up out of the

308 *           .    way.   (ZTREXC can not fail in this case.) ====

309 *

310             IFST = NS

311             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )

312             ILST = ILST + 1

313          END IF

314    10 CONTINUE

315 *

316 *        ==== Return to Hessenberg form ====

317 *

318       IF( NS.EQ.0 )

319      $   S = ZERO

320 *

321       IF( NS.LT.JW ) THEN

322 *

323 *        ==== sorting the diagonal of T improves accuracy for

324 *        .    graded matrices.  ====

325 *

326          DO 30 I = INFQR + 1, NS

327             IFST = I

328             DO 20 J = I + 1, NS

329                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )

330      $            IFST = J

331    20       CONTINUE

332             ILST = I

333             IF( IFST.NE.ILST )

334      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )

335    30    CONTINUE

336       END IF

337 *

338 *     ==== Restore shift/eigenvalue array from T ====

339 *

340       DO 40 I = INFQR + 1, JW

341          SH( KWTOP+I-1 ) = T( I, I )

342    40 CONTINUE

343 *

344 *

345       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN

346          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN

347 *

348 *           ==== Reflect spike back into lower triangle ====

349 *

350             CALL ZCOPY( NS, V, LDV, WORK, 1 )

351             DO 50 I = 1, NS

352                WORK( I ) = DCONJG( WORK( I ) )

353    50       CONTINUE

354             BETA = WORK( 1 )

355             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )

356             WORK( 1 ) = ONE

357 *

358             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )

359 *

360             CALL ZLARF( 'L', NS, JW, WORK, 1, DCONJG( TAU ), T, LDT,

361      $                  WORK( JW+1 ) )

362             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,

363      $                  WORK( JW+1 ) )

364             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,

365      $                  WORK( JW+1 ) )

366 *

367             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),

368      $                   LWORK-JW, INFO )

369          END IF

370 *

371 *        ==== Copy updated reduced window into place ====

372 *

373          IF( KWTOP.GT.1 )

374      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 1, 1 ) )

375          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )

376          CALL ZCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),

377      $               LDH+1 )

378 *

379 *        ==== Accumulate orthogonal matrix in order update

380 *        .    H and Z, if requested.  ====

381 *

382          IF( NS.GT.1 .AND. S.NE.ZERO )

383      $      CALL ZUNMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,

384      $                   WORK( JW+1 ), LWORK-JW, INFO )

385 *

386 *        ==== Update vertical slab in H ====

387 *

388          IF( WANTT ) THEN

389             LTOP = 1

390          ELSE

391             LTOP = KTOP

392          END IF

393          DO 60 KROW = LTOP, KWTOP - 1, NV

394             KLN = MIN( NV, KWTOP-KROW )

395             CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),

396      $                  LDH, V, LDV, ZERO, WV, LDWV )

397             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )

398    60    CONTINUE

399 *

400 *        ==== Update horizontal slab in H ====

401 *

402          IF( WANTT ) THEN

403             DO 70 KCOL = KBOT + 1, N, NH

404                KLN = MIN( NH, N-KCOL+1 )

405                CALL ZGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,

406      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )

407                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),

408      $                      LDH )

409    70       CONTINUE

410          END IF

411 *

412 *        ==== Update vertical slab in Z ====

413 *

414          IF( WANTZ ) THEN

415             DO 80 KROW = ILOZ, IHIZ, NV

416                KLN = MIN( NV, IHIZ-KROW+1 )

417                CALL ZGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),

418      $                     LDZ, V, LDV, ZERO, WV, LDWV )

419                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),

420      $                      LDZ )

421    80       CONTINUE

422          END IF

423       END IF

424 *

425 *     ==== Return the number of deflations ... ====

426 *

427       ND = JW - NS

428 *

429 *     ==== ... and the number of shifts. (Subtracting

430 *     .    INFQR from the spike length takes care

431 *     .    of the case of a rare QR failure while

432 *     .    calculating eigenvalues of the deflation

433 *     .    window.)  ====

434 *

435       NS = NS - INFQR

436 *

437 *      ==== Return optimal workspace. ====

438 *

439       WORK( 1 ) = DCMPLX( LWKOPT, 0 )

440 *

441 *     ==== End of ZLAQR3 ====

442 *

443       END