1       SUBROUTINE ZLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
  2      $                   IHIZ, Z, LDZ, NS, ND, SH, V, LDV, NH, T, LDT,
  3      $                   NV, WV, LDWV, WORK, LWORK )
  4 *
  5 *  -- LAPACK auxiliary routine (version 3.2.1)                        --
  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
  7 *  -- April 2009                                                      --
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
 12       LOGICAL            WANTT, WANTZ
 13 *     ..
 14 *     .. Array Arguments ..
 15       COMPLEX*16         H( LDH, * ), SH( * ), T( LDT, * ), V( LDV, * ),
 16      $                   WORK( * ), WV( LDWV, * ), Z( LDZ, * )
 17 *     ..
 18 *
 19 *     ******************************************************************
 20 *     Aggressive early deflation:
 21 *
 22 *     This subroutine accepts as input an upper Hessenberg matrix
 23 *     H and performs an unitary similarity transformation
 24 *     designed to detect and deflate fully converged eigenvalues from
 25 *     a trailing principal submatrix.  On output H has been over-
 26 *     written by a new Hessenberg matrix that is a perturbation of
 27 *     an unitary similarity transformation of H.  It is to be
 28 *     hoped that the final version of H has many zero subdiagonal
 29 *     entries.
 30 *
 31 *     ******************************************************************
 32 *     WANTT   (input) LOGICAL
 33 *          If .TRUE., then the Hessenberg matrix H is fully updated
 34 *          so that the triangular Schur factor may be
 35 *          computed (in cooperation with the calling subroutine).
 36 *          If .FALSE., then only enough of H is updated to preserve
 37 *          the eigenvalues.
 38 *
 39 *     WANTZ   (input) LOGICAL
 40 *          If .TRUE., then the unitary matrix Z is updated so
 41 *          so that the unitary Schur factor may be computed
 42 *          (in cooperation with the calling subroutine).
 43 *          If .FALSE., then Z is not referenced.
 44 *
 45 *     N       (input) INTEGER
 46 *          The order of the matrix H and (if WANTZ is .TRUE.) the
 47 *          order of the unitary matrix Z.
 48 *
 49 *     KTOP    (input) INTEGER
 50 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
 51 *          KBOT and KTOP together determine an isolated block
 52 *          along the diagonal of the Hessenberg matrix.
 53 *
 54 *     KBOT    (input) INTEGER
 55 *          It is assumed without a check that either
 56 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
 57 *          determine an isolated block along the diagonal of the
 58 *          Hessenberg matrix.
 59 *
 60 *     NW      (input) INTEGER
 61 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
 62 *
 63 *     H       (input/output) COMPLEX*16 array, dimension (LDH,N)
 64 *          On input the initial N-by-N section of H stores the
 65 *          Hessenberg matrix undergoing aggressive early deflation.
 66 *          On output H has been transformed by a unitary
 67 *          similarity transformation, perturbed, and the returned
 68 *          to Hessenberg form that (it is to be hoped) has some
 69 *          zero subdiagonal entries.
 70 *
 71 *     LDH     (input) integer
 72 *          Leading dimension of H just as declared in the calling
 73 *          subroutine.  N .LE. LDH
 74 *
 75 *     ILOZ    (input) INTEGER
 76 *     IHIZ    (input) INTEGER
 77 *          Specify the rows of Z to which transformations must be
 78 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
 79 *
 80 *     Z       (input/output) COMPLEX*16 array, dimension (LDZ,N)
 81 *          IF WANTZ is .TRUE., then on output, the unitary
 82 *          similarity transformation mentioned above has been
 83 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
 84 *          If WANTZ is .FALSE., then Z is unreferenced.
 85 *
 86 *     LDZ     (input) integer
 87 *          The leading dimension of Z just as declared in the
 88 *          calling subroutine.  1 .LE. LDZ.
 89 *
 90 *     NS      (output) integer
 91 *          The number of unconverged (ie approximate) eigenvalues
 92 *          returned in SR and SI that may be used as shifts by the
 93 *          calling subroutine.
 94 *
 95 *     ND      (output) integer
 96 *          The number of converged eigenvalues uncovered by this
 97 *          subroutine.
 98 *
 99 *     SH      (output) COMPLEX*16 array, dimension KBOT
100 *          On output, approximate eigenvalues that may
101 *          be used for shifts are stored in SH(KBOT-ND-NS+1)
102 *          through SR(KBOT-ND).  Converged eigenvalues are
103 *          stored in SH(KBOT-ND+1) through SH(KBOT).
104 *
105 *     V       (workspace) COMPLEX*16 array, dimension (LDV,NW)
106 *          An NW-by-NW work array.
107 *
108 *     LDV     (input) integer scalar
109 *          The leading dimension of V just as declared in the
110 *          calling subroutine.  NW .LE. LDV
111 *
112 *     NH      (input) integer scalar
113 *          The number of columns of T.  NH.GE.NW.
114 *
115 *     T       (workspace) COMPLEX*16 array, dimension (LDT,NW)
116 *
117 *     LDT     (input) integer
118 *          The leading dimension of T just as declared in the
119 *          calling subroutine.  NW .LE. LDT
120 *
121 *     NV      (input) integer
122 *          The number of rows of work array WV available for
123 *          workspace.  NV.GE.NW.
124 *
125 *     WV      (workspace) COMPLEX*16 array, dimension (LDWV,NW)
126 *
127 *     LDWV    (input) integer
128 *          The leading dimension of W just as declared in the
129 *          calling subroutine.  NW .LE. LDV
130 *
131 *     WORK    (workspace) COMPLEX*16 array, dimension LWORK.
132 *          On exit, WORK(1) is set to an estimate of the optimal value
133 *          of LWORK for the given values of N, NW, KTOP and KBOT.
134 *
135 *     LWORK   (input) integer
136 *          The dimension of the work array WORK.  LWORK = 2*NW
137 *          suffices, but greater efficiency may result from larger
138 *          values of LWORK.
139 *
140 *          If LWORK = -1, then a workspace query is assumed; ZLAQR3
141 *          only estimates the optimal workspace size for the given
142 *          values of N, NW, KTOP and KBOT.  The estimate is returned
143 *          in WORK(1).  No error message related to LWORK is issued
144 *          by XERBLA.  Neither H nor Z are accessed.
145 *
146 *     ================================================================
147 *     Based on contributions by
148 *        Karen Braman and Ralph Byers, Department of Mathematics,
149 *        University of Kansas, USA
150 *
151 *     ================================================================
152 *     .. Parameters ..
153       COMPLEX*16         ZERO, ONE
154       PARAMETER          ( ZERO = ( 0.0d00.0d0 ),
155      $                   ONE = ( 1.0d00.0d0 ) )
156       DOUBLE PRECISION   RZERO, RONE
157       PARAMETER          ( RZERO = 0.0d0, RONE = 1.0d0 )
158 *     ..
159 *     .. Local Scalars ..
160       COMPLEX*16         BETA, CDUM, S, TAU
161       DOUBLE PRECISION   FOO, SAFMAX, SAFMIN, SMLNUM, ULP
162       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, KCOL, KLN,
163      $                   KNT, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
164      $                   LWKOPT, NMIN
165 *     ..
166 *     .. External Functions ..
167       DOUBLE PRECISION   DLAMCH
168       INTEGER            ILAENV
169       EXTERNAL           DLAMCH, ILAENV
170 *     ..
171 *     .. External Subroutines ..
172       EXTERNAL           DLABAD, ZCOPY, ZGEHRD, ZGEMM, ZLACPY, ZLAHQR,
173      $                   ZLAQR4, ZLARF, ZLARFG, ZLASET, ZTREXC, ZUNMHR
174 *     ..
175 *     .. Intrinsic Functions ..
176       INTRINSIC          ABSDBLEDCMPLXDCONJGDIMAGINTMAXMIN
177 *     ..
178 *     .. Statement Functions ..
179       DOUBLE PRECISION   CABS1
180 *     ..
181 *     .. Statement Function definitions ..
182       CABS1( CDUM ) = ABSDBLE( CDUM ) ) + ABSDIMAG( CDUM ) )
183 *     ..
184 *     .. Executable Statements ..
185 *
186 *     ==== Estimate optimal workspace. ====
187 *
188       JW = MIN( NW, KBOT-KTOP+1 )
189       IF( JW.LE.2 ) THEN
190          LWKOPT = 1
191       ELSE
192 *
193 *        ==== Workspace query call to ZGEHRD ====
194 *
195          CALL ZGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196          LWK1 = INT( WORK( 1 ) )
197 *
198 *        ==== Workspace query call to ZUNMHR ====
199 *
200          CALL ZUNMHR( 'R''N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201      $                WORK, -1, INFO )
202          LWK2 = INT( WORK( 1 ) )
203 *
204 *        ==== Workspace query call to ZLAQR4 ====
205 *
206          CALL ZLAQR4( .true..true., JW, 1, JW, T, LDT, SH, 1, JW, V,
207      $                LDV, WORK, -1, INFQR )
208          LWK3 = INT( WORK( 1 ) )
209 *
210 *        ==== Optimal workspace ====
211 *
212          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
213       END IF
214 *
215 *     ==== Quick return in case of workspace query. ====
216 *
217       IF( LWORK.EQ.-1 ) THEN
218          WORK( 1 ) = DCMPLX( LWKOPT, 0 )
219          RETURN
220       END IF
221 *
222 *     ==== Nothing to do ...
223 *     ... for an empty active block ... ====
224       NS = 0
225       ND = 0
226       WORK( 1 ) = ONE
227       IF( KTOP.GT.KBOT )
228      $   RETURN
229 *     ... nor for an empty deflation window. ====
230       IF( NW.LT.1 )
231      $   RETURN
232 *
233 *     ==== Machine constants ====
234 *
235       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
236       SAFMAX = RONE / SAFMIN
237       CALL DLABAD( SAFMIN, SAFMAX )
238       ULP = DLAMCH( 'PRECISION' )
239       SMLNUM = SAFMIN*DBLE( N ) / ULP )
240 *
241 *     ==== Setup deflation window ====
242 *
243       JW = MIN( NW, KBOT-KTOP+1 )
244       KWTOP = KBOT - JW + 1
245       IF( KWTOP.EQ.KTOP ) THEN
246          S = ZERO
247       ELSE
248          S = H( KWTOP, KWTOP-1 )
249       END IF
250 *
251       IF( KBOT.EQ.KWTOP ) THEN
252 *
253 *        ==== 1-by-1 deflation window: not much to do ====
254 *
255          SH( KWTOP ) = H( KWTOP, KWTOP )
256          NS = 1
257          ND = 0
258          IF( CABS1( S ).LE.MAX( SMLNUM, ULP*CABS1( H( KWTOP,
259      $       KWTOP ) ) ) ) THEN
260             NS = 0
261             ND = 1
262             IF( KWTOP.GT.KTOP )
263      $         H( KWTOP, KWTOP-1 ) = ZERO
264          END IF
265          WORK( 1 ) = ONE
266          RETURN
267       END IF
268 *
269 *     ==== Convert to spike-triangular form.  (In case of a
270 *     .    rare QR failure, this routine continues to do
271 *     .    aggressive early deflation using that part of
272 *     .    the deflation window that converged using INFQR
273 *     .    here and there to keep track.) ====
274 *
275       CALL ZLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
276       CALL ZCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 21 ), LDT+1 )
277 *
278       CALL ZLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
279       NMIN = ILAENV( 12'ZLAQR3''SV', JW, 1, JW, LWORK )
280       IF( JW.GT.NMIN ) THEN
281          CALL ZLAQR4( .true..true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
282      $                JW, V, LDV, WORK, LWORK, INFQR )
283       ELSE
284          CALL ZLAHQR( .true..true., JW, 1, JW, T, LDT, SH( KWTOP ), 1,
285      $                JW, V, LDV, INFQR )
286       END IF
287 *
288 *     ==== Deflation detection loop ====
289 *
290       NS = JW
291       ILST = INFQR + 1
292       DO 10 KNT = INFQR + 1, JW
293 *
294 *        ==== Small spike tip deflation test ====
295 *
296          FOO = CABS1( T( NS, NS ) )
297          IF( FOO.EQ.RZERO )
298      $      FOO = CABS1( S )
299          IF( CABS1( S )*CABS1( V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) )
300      $        THEN
301 *
302 *           ==== One more converged eigenvalue ====
303 *
304             NS = NS - 1
305          ELSE
306 *
307 *           ==== One undeflatable eigenvalue.  Move it up out of the
308 *           .    way.   (ZTREXC can not fail in this case.) ====
309 *
310             IFST = NS
311             CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
312             ILST = ILST + 1
313          END IF
314    10 CONTINUE
315 *
316 *        ==== Return to Hessenberg form ====
317 *
318       IF( NS.EQ.0 )
319      $   S = ZERO
320 *
321       IF( NS.LT.JW ) THEN
322 *
323 *        ==== sorting the diagonal of T improves accuracy for
324 *        .    graded matrices.  ====
325 *
326          DO 30 I = INFQR + 1, NS
327             IFST = I
328             DO 20 J = I + 1, NS
329                IF( CABS1( T( J, J ) ).GT.CABS1( T( IFST, IFST ) ) )
330      $            IFST = J
331    20       CONTINUE
332             ILST = I
333             IF( IFST.NE.ILST )
334      $         CALL ZTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, INFO )
335    30    CONTINUE
336       END IF
337 *
338 *     ==== Restore shift/eigenvalue array from T ====
339 *
340       DO 40 I = INFQR + 1, JW
341          SH( KWTOP+I-1 ) = T( I, I )
342    40 CONTINUE
343 *
344 *
345       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
346          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
347 *
348 *           ==== Reflect spike back into lower triangle ====
349 *
350             CALL ZCOPY( NS, V, LDV, WORK, 1 )
351             DO 50 I = 1, NS
352                WORK( I ) = DCONJG( WORK( I ) )
353    50       CONTINUE
354             BETA = WORK( 1 )
355             CALL ZLARFG( NS, BETA, WORK( 2 ), 1, TAU )
356             WORK( 1 ) = ONE
357 *
358             CALL ZLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 31 ), LDT )
359 *
360             CALL ZLARF( 'L', NS, JW, WORK, 1DCONJG( TAU ), T, LDT,
361      $                  WORK( JW+1 ) )
362             CALL ZLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
363      $                  WORK( JW+1 ) )
364             CALL ZLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
365      $                  WORK( JW+1 ) )
366 *
367             CALL ZGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
368      $                   LWORK-JW, INFO )
369          END IF
370 *
371 *        ==== Copy updated reduced window into place ====
372 *
373          IF( KWTOP.GT.1 )
374      $      H( KWTOP, KWTOP-1 ) = S*DCONJG( V( 11 ) )
375          CALL ZLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
376          CALL ZCOPY( JW-1, T( 21 ), LDT+1, H( KWTOP+1, KWTOP ),
377      $               LDH+1 )
378 *
379 *        ==== Accumulate orthogonal matrix in order update
380 *        .    H and Z, if requested.  ====
381 *
382          IF( NS.GT.1 .AND. S.NE.ZERO )
383      $      CALL ZUNMHR( 'R''N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
384      $                   WORK( JW+1 ), LWORK-JW, INFO )
385 *
386 *        ==== Update vertical slab in H ====
387 *
388          IF( WANTT ) THEN
389             LTOP = 1
390          ELSE
391             LTOP = KTOP
392          END IF
393          DO 60 KROW = LTOP, KWTOP - 1, NV
394             KLN = MIN( NV, KWTOP-KROW )
395             CALL ZGEMM( 'N''N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
396      $                  LDH, V, LDV, ZERO, WV, LDWV )
397             CALL ZLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
398    60    CONTINUE
399 *
400 *        ==== Update horizontal slab in H ====
401 *
402          IF( WANTT ) THEN
403             DO 70 KCOL = KBOT + 1, N, NH
404                KLN = MIN( NH, N-KCOL+1 )
405                CALL ZGEMM( 'C''N', JW, KLN, JW, ONE, V, LDV,
406      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
407                CALL ZLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
408      $                      LDH )
409    70       CONTINUE
410          END IF
411 *
412 *        ==== Update vertical slab in Z ====
413 *
414          IF( WANTZ ) THEN
415             DO 80 KROW = ILOZ, IHIZ, NV
416                KLN = MIN( NV, IHIZ-KROW+1 )
417                CALL ZGEMM( 'N''N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
418      $                     LDZ, V, LDV, ZERO, WV, LDWV )
419                CALL ZLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
420      $                      LDZ )
421    80       CONTINUE
422          END IF
423       END IF
424 *
425 *     ==== Return the number of deflations ... ====
426 *
427       ND = JW - NS
428 *
429 *     ==== ... and the number of shifts. (Subtracting
430 *     .    INFQR from the spike length takes care
431 *     .    of the case of a rare QR failure while
432 *     .    calculating eigenvalues of the deflation
433 *     .    window.)  ====
434 *
435       NS = NS - INFQR
436 *
437 *      ==== Return optimal workspace. ====
438 *
439       WORK( 1 ) = DCMPLX( LWKOPT, 0 )
440 *
441 *     ==== End of ZLAQR3 ====
442 *
443       END