1       SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
  2      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
  3      $                   LDT, NV, WV, LDWV, WORK, LWORK )
  4 *
  5 *  -- LAPACK auxiliary routine (version 3.2.2)                        --
  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
  7 *  -- June 2010                                                       --
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
 12       LOGICAL            WANTT, WANTZ
 13 *     ..
 14 *     .. Array Arguments ..
 15       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
 16      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
 17      $                   Z( LDZ, * )
 18 *     ..
 19 *
 20 *     ******************************************************************
 21 *     Aggressive early deflation:
 22 *
 23 *     This subroutine accepts as input an upper Hessenberg matrix
 24 *     H and performs an orthogonal similarity transformation
 25 *     designed to detect and deflate fully converged eigenvalues from
 26 *     a trailing principal submatrix.  On output H has been over-
 27 *     written by a new Hessenberg matrix that is a perturbation of
 28 *     an orthogonal similarity transformation of H.  It is to be
 29 *     hoped that the final version of H has many zero subdiagonal
 30 *     entries.
 31 *
 32 *     ******************************************************************
 33 *     WANTT   (input) LOGICAL
 34 *          If .TRUE., then the Hessenberg matrix H is fully updated
 35 *          so that the quasi-triangular Schur factor may be
 36 *          computed (in cooperation with the calling subroutine).
 37 *          If .FALSE., then only enough of H is updated to preserve
 38 *          the eigenvalues.
 39 *
 40 *     WANTZ   (input) LOGICAL
 41 *          If .TRUE., then the orthogonal matrix Z is updated so
 42 *          so that the orthogonal Schur factor may be computed
 43 *          (in cooperation with the calling subroutine).
 44 *          If .FALSE., then Z is not referenced.
 45 *
 46 *     N       (input) INTEGER
 47 *          The order of the matrix H and (if WANTZ is .TRUE.) the
 48 *          order of the orthogonal matrix Z.
 49 *
 50 *     KTOP    (input) INTEGER
 51 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
 52 *          KBOT and KTOP together determine an isolated block
 53 *          along the diagonal of the Hessenberg matrix.
 54 *
 55 *     KBOT    (input) INTEGER
 56 *          It is assumed without a check that either
 57 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
 58 *          determine an isolated block along the diagonal of the
 59 *          Hessenberg matrix.
 60 *
 61 *     NW      (input) INTEGER
 62 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
 63 *
 64 *     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
 65 *          On input the initial N-by-N section of H stores the
 66 *          Hessenberg matrix undergoing aggressive early deflation.
 67 *          On output H has been transformed by an orthogonal
 68 *          similarity transformation, perturbed, and the returned
 69 *          to Hessenberg form that (it is to be hoped) has some
 70 *          zero subdiagonal entries.
 71 *
 72 *     LDH     (input) integer
 73 *          Leading dimension of H just as declared in the calling
 74 *          subroutine.  N .LE. LDH
 75 *
 76 *     ILOZ    (input) INTEGER
 77 *     IHIZ    (input) INTEGER
 78 *          Specify the rows of Z to which transformations must be
 79 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
 80 *
 81 *     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
 82 *          IF WANTZ is .TRUE., then on output, the orthogonal
 83 *          similarity transformation mentioned above has been
 84 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
 85 *          If WANTZ is .FALSE., then Z is unreferenced.
 86 *
 87 *     LDZ     (input) integer
 88 *          The leading dimension of Z just as declared in the
 89 *          calling subroutine.  1 .LE. LDZ.
 90 *
 91 *     NS      (output) integer
 92 *          The number of unconverged (ie approximate) eigenvalues
 93 *          returned in SR and SI that may be used as shifts by the
 94 *          calling subroutine.
 95 *
 96 *     ND      (output) integer
 97 *          The number of converged eigenvalues uncovered by this
 98 *          subroutine.
 99 *
100 *     SR      (output) DOUBLE PRECISION array, dimension (KBOT)
101 *     SI      (output) DOUBLE PRECISION array, dimension (KBOT)
102 *          On output, the real and imaginary parts of approximate
103 *          eigenvalues that may be used for shifts are stored in
104 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
105 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
106 *          The real and imaginary parts of converged eigenvalues
107 *          are stored in SR(KBOT-ND+1) through SR(KBOT) and
108 *          SI(KBOT-ND+1) through SI(KBOT), respectively.
109 *
110 *     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
111 *          An NW-by-NW work array.
112 *
113 *     LDV     (input) integer scalar
114 *          The leading dimension of V just as declared in the
115 *          calling subroutine.  NW .LE. LDV
116 *
117 *     NH      (input) integer scalar
118 *          The number of columns of T.  NH.GE.NW.
119 *
120 *     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
121 *
122 *     LDT     (input) integer
123 *          The leading dimension of T just as declared in the
124 *          calling subroutine.  NW .LE. LDT
125 *
126 *     NV      (input) integer
127 *          The number of rows of work array WV available for
128 *          workspace.  NV.GE.NW.
129 *
130 *     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
131 *
132 *     LDWV    (input) integer
133 *          The leading dimension of W just as declared in the
134 *          calling subroutine.  NW .LE. LDV
135 *
136 *     WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
137 *          On exit, WORK(1) is set to an estimate of the optimal value
138 *          of LWORK for the given values of N, NW, KTOP and KBOT.
139 *
140 *     LWORK   (input) integer
141 *          The dimension of the work array WORK.  LWORK = 2*NW
142 *          suffices, but greater efficiency may result from larger
143 *          values of LWORK.
144 *
145 *          If LWORK = -1, then a workspace query is assumed; DLAQR3
146 *          only estimates the optimal workspace size for the given
147 *          values of N, NW, KTOP and KBOT.  The estimate is returned
148 *          in WORK(1).  No error message related to LWORK is issued
149 *          by XERBLA.  Neither H nor Z are accessed.
150 *
151 *     ================================================================
152 *     Based on contributions by
153 *        Karen Braman and Ralph Byers, Department of Mathematics,
154 *        University of Kansas, USA
155 *
156 *     ================================================================
157 *     .. Parameters ..
158       DOUBLE PRECISION   ZERO, ONE
159       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
160 *     ..
161 *     .. Local Scalars ..
162       DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
163      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
164       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
165      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166      $                   LWKOPT, NMIN
167       LOGICAL            BULGE, SORTED
168 *     ..
169 *     .. External Functions ..
170       DOUBLE PRECISION   DLAMCH
171       INTEGER            ILAENV
172       EXTERNAL           DLAMCH, ILAENV
173 *     ..
174 *     .. External Subroutines ..
175       EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
176      $                   DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR,
177      $                   DTREXC
178 *     ..
179 *     .. Intrinsic Functions ..
180       INTRINSIC          ABSDBLEINTMAXMINSQRT
181 *     ..
182 *     .. Executable Statements ..
183 *
184 *     ==== Estimate optimal workspace. ====
185 *
186       JW = MIN( NW, KBOT-KTOP+1 )
187       IF( JW.LE.2 ) THEN
188          LWKOPT = 1
189       ELSE
190 *
191 *        ==== Workspace query call to DGEHRD ====
192 *
193          CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
194          LWK1 = INT( WORK( 1 ) )
195 *
196 *        ==== Workspace query call to DORMHR ====
197 *
198          CALL DORMHR( 'R''N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
199      $                WORK, -1, INFO )
200          LWK2 = INT( WORK( 1 ) )
201 *
202 *        ==== Workspace query call to DLAQR4 ====
203 *
204          CALL DLAQR4( .true..true., JW, 1, JW, T, LDT, SR, SI, 1, JW,
205      $                V, LDV, WORK, -1, INFQR )
206          LWK3 = INT( WORK( 1 ) )
207 *
208 *        ==== Optimal workspace ====
209 *
210          LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
211       END IF
212 *
213 *     ==== Quick return in case of workspace query. ====
214 *
215       IF( LWORK.EQ.-1 ) THEN
216          WORK( 1 ) = DBLE( LWKOPT )
217          RETURN
218       END IF
219 *
220 *     ==== Nothing to do ...
221 *     ... for an empty active block ... ====
222       NS = 0
223       ND = 0
224       WORK( 1 ) = ONE
225       IF( KTOP.GT.KBOT )
226      $   RETURN
227 *     ... nor for an empty deflation window. ====
228       IF( NW.LT.1 )
229      $   RETURN
230 *
231 *     ==== Machine constants ====
232 *
233       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
234       SAFMAX = ONE / SAFMIN
235       CALL DLABAD( SAFMIN, SAFMAX )
236       ULP = DLAMCH( 'PRECISION' )
237       SMLNUM = SAFMIN*DBLE( N ) / ULP )
238 *
239 *     ==== Setup deflation window ====
240 *
241       JW = MIN( NW, KBOT-KTOP+1 )
242       KWTOP = KBOT - JW + 1
243       IF( KWTOP.EQ.KTOP ) THEN
244          S = ZERO
245       ELSE
246          S = H( KWTOP, KWTOP-1 )
247       END IF
248 *
249       IF( KBOT.EQ.KWTOP ) THEN
250 *
251 *        ==== 1-by-1 deflation window: not much to do ====
252 *
253          SR( KWTOP ) = H( KWTOP, KWTOP )
254          SI( KWTOP ) = ZERO
255          NS = 1
256          ND = 0
257          IFABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
258      $        THEN
259             NS = 0
260             ND = 1
261             IF( KWTOP.GT.KTOP )
262      $         H( KWTOP, KWTOP-1 ) = ZERO
263          END IF
264          WORK( 1 ) = ONE
265          RETURN
266       END IF
267 *
268 *     ==== Convert to spike-triangular form.  (In case of a
269 *     .    rare QR failure, this routine continues to do
270 *     .    aggressive early deflation using that part of
271 *     .    the deflation window that converged using INFQR
272 *     .    here and there to keep track.) ====
273 *
274       CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
275       CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 21 ), LDT+1 )
276 *
277       CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
278       NMIN = ILAENV( 12'DLAQR3''SV', JW, 1, JW, LWORK )
279       IF( JW.GT.NMIN ) THEN
280          CALL DLAQR4( .true..true., JW, 1, JW, T, LDT, SR( KWTOP ),
281      $                SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )
282       ELSE
283          CALL DLAHQR( .true..true., JW, 1, JW, T, LDT, SR( KWTOP ),
284      $                SI( KWTOP ), 1, JW, V, LDV, INFQR )
285       END IF
286 *
287 *     ==== DTREXC needs a clean margin near the diagonal ====
288 *
289       DO 10 J = 1, JW - 3
290          T( J+2, J ) = ZERO
291          T( J+3, J ) = ZERO
292    10 CONTINUE
293       IF( JW.GT.2 )
294      $   T( JW, JW-2 ) = ZERO
295 *
296 *     ==== Deflation detection loop ====
297 *
298       NS = JW
299       ILST = INFQR + 1
300    20 CONTINUE
301       IF( ILST.LE.NS ) THEN
302          IF( NS.EQ.1 ) THEN
303             BULGE = .FALSE.
304          ELSE
305             BULGE = T( NS, NS-1 ).NE.ZERO
306          END IF
307 *
308 *        ==== Small spike tip test for deflation ====
309 *
310          IF.NOT.BULGE ) THEN
311 *
312 *           ==== Real eigenvalue ====
313 *
314             FOO = ABS( T( NS, NS ) )
315             IF( FOO.EQ.ZERO )
316      $         FOO = ABS( S )
317             IFABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
318 *
319 *              ==== Deflatable ====
320 *
321                NS = NS - 1
322             ELSE
323 *
324 *              ==== Undeflatable.   Move it up out of the way.
325 *              .    (DTREXC can not fail in this case.) ====
326 *
327                IFST = NS
328                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
329      $                      INFO )
330                ILST = ILST + 1
331             END IF
332          ELSE
333 *
334 *           ==== Complex conjugate pair ====
335 *
336             FOO = ABS( T( NS, NS ) ) + SQRTABS( T( NS, NS-1 ) ) )*
337      $            SQRTABS( T( NS-1, NS ) ) )
338             IF( FOO.EQ.ZERO )
339      $         FOO = ABS( S )
340             IFMAXABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
341      $          MAX( SMLNUM, ULP*FOO ) ) THEN
342 *
343 *              ==== Deflatable ====
344 *
345                NS = NS - 2
346             ELSE
347 *
348 *              ==== Undeflatable. Move them up out of the way.
349 *              .    Fortunately, DTREXC does the right thing with
350 *              .    ILST in case of a rare exchange failure. ====
351 *
352                IFST = NS
353                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
354      $                      INFO )
355                ILST = ILST + 2
356             END IF
357          END IF
358 *
359 *        ==== End deflation detection loop ====
360 *
361          GO TO 20
362       END IF
363 *
364 *        ==== Return to Hessenberg form ====
365 *
366       IF( NS.EQ.0 )
367      $   S = ZERO
368 *
369       IF( NS.LT.JW ) THEN
370 *
371 *        ==== sorting diagonal blocks of T improves accuracy for
372 *        .    graded matrices.  Bubble sort deals well with
373 *        .    exchange failures. ====
374 *
375          SORTED = .false.
376          I = NS + 1
377    30    CONTINUE
378          IF( SORTED )
379      $      GO TO 50
380          SORTED = .true.
381 *
382          KEND = I - 1
383          I = INFQR + 1
384          IF( I.EQ.NS ) THEN
385             K = I + 1
386          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
387             K = I + 1
388          ELSE
389             K = I + 2
390          END IF
391    40    CONTINUE
392          IF( K.LE.KEND ) THEN
393             IF( K.EQ.I+1 ) THEN
394                EVI = ABS( T( I, I ) )
395             ELSE
396                EVI = ABS( T( I, I ) ) + SQRTABS( T( I+1, I ) ) )*
397      $               SQRTABS( T( I, I+1 ) ) )
398             END IF
399 *
400             IF( K.EQ.KEND ) THEN
401                EVK = ABS( T( K, K ) )
402             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
403                EVK = ABS( T( K, K ) )
404             ELSE
405                EVK = ABS( T( K, K ) ) + SQRTABS( T( K+1, K ) ) )*
406      $               SQRTABS( T( K, K+1 ) ) )
407             END IF
408 *
409             IF( EVI.GE.EVK ) THEN
410                I = K
411             ELSE
412                SORTED = .false.
413                IFST = I
414                ILST = K
415                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
416      $                      INFO )
417                IF( INFO.EQ.0 ) THEN
418                   I = ILST
419                ELSE
420                   I = K
421                END IF
422             END IF
423             IF( I.EQ.KEND ) THEN
424                K = I + 1
425             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
426                K = I + 1
427             ELSE
428                K = I + 2
429             END IF
430             GO TO 40
431          END IF
432          GO TO 30
433    50    CONTINUE
434       END IF
435 *
436 *     ==== Restore shift/eigenvalue array from T ====
437 *
438       I = JW
439    60 CONTINUE
440       IF( I.GE.INFQR+1 ) THEN
441          IF( I.EQ.INFQR+1 ) THEN
442             SR( KWTOP+I-1 ) = T( I, I )
443             SI( KWTOP+I-1 ) = ZERO
444             I = I - 1
445          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
446             SR( KWTOP+I-1 ) = T( I, I )
447             SI( KWTOP+I-1 ) = ZERO
448             I = I - 1
449          ELSE
450             AA = T( I-1, I-1 )
451             CC = T( I, I-1 )
452             BB = T( I-1, I )
453             DD = T( I, I )
454             CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
455      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
456      $                   SI( KWTOP+I-1 ), CS, SN )
457             I = I - 2
458          END IF
459          GO TO 60
460       END IF
461 *
462       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
463          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
464 *
465 *           ==== Reflect spike back into lower triangle ====
466 *
467             CALL DCOPY( NS, V, LDV, WORK, 1 )
468             BETA = WORK( 1 )
469             CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
470             WORK( 1 ) = ONE
471 *
472             CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 31 ), LDT )
473 *
474             CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
475      $                  WORK( JW+1 ) )
476             CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
477      $                  WORK( JW+1 ) )
478             CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
479      $                  WORK( JW+1 ) )
480 *
481             CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
482      $                   LWORK-JW, INFO )
483          END IF
484 *
485 *        ==== Copy updated reduced window into place ====
486 *
487          IF( KWTOP.GT.1 )
488      $      H( KWTOP, KWTOP-1 ) = S*V( 11 )
489          CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
490          CALL DCOPY( JW-1, T( 21 ), LDT+1, H( KWTOP+1, KWTOP ),
491      $               LDH+1 )
492 *
493 *        ==== Accumulate orthogonal matrix in order update
494 *        .    H and Z, if requested.  ====
495 *
496          IF( NS.GT.1 .AND. S.NE.ZERO )
497      $      CALL DORMHR( 'R''N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
498      $                   WORK( JW+1 ), LWORK-JW, INFO )
499 *
500 *        ==== Update vertical slab in H ====
501 *
502          IF( WANTT ) THEN
503             LTOP = 1
504          ELSE
505             LTOP = KTOP
506          END IF
507          DO 70 KROW = LTOP, KWTOP - 1, NV
508             KLN = MIN( NV, KWTOP-KROW )
509             CALL DGEMM( 'N''N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
510      $                  LDH, V, LDV, ZERO, WV, LDWV )
511             CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
512    70    CONTINUE
513 *
514 *        ==== Update horizontal slab in H ====
515 *
516          IF( WANTT ) THEN
517             DO 80 KCOL = KBOT + 1, N, NH
518                KLN = MIN( NH, N-KCOL+1 )
519                CALL DGEMM( 'C''N', JW, KLN, JW, ONE, V, LDV,
520      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
521                CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
522      $                      LDH )
523    80       CONTINUE
524          END IF
525 *
526 *        ==== Update vertical slab in Z ====
527 *
528          IF( WANTZ ) THEN
529             DO 90 KROW = ILOZ, IHIZ, NV
530                KLN = MIN( NV, IHIZ-KROW+1 )
531                CALL DGEMM( 'N''N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
532      $                     LDZ, V, LDV, ZERO, WV, LDWV )
533                CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
534      $                      LDZ )
535    90       CONTINUE
536          END IF
537       END IF
538 *
539 *     ==== Return the number of deflations ... ====
540 *
541       ND = JW - NS
542 *
543 *     ==== ... and the number of shifts. (Subtracting
544 *     .    INFQR from the spike length takes care
545 *     .    of the case of a rare QR failure while
546 *     .    calculating eigenvalues of the deflation
547 *     .    window.)  ====
548 *
549       NS = NS - INFQR
550 *
551 *      ==== Return optimal workspace. ====
552 *
553       WORK( 1 ) = DBLE( LWKOPT )
554 *
555 *     ==== End of DLAQR3 ====
556 *
557       END