1       SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
  2      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
  3      $                   LDT, NV, WV, LDWV, WORK, LWORK )
  4 *
  5 *  -- LAPACK auxiliary routine (version 3.2.2)                        --
  6 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
  7 *  -- June 2010                                                       --
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
 11      $                   LDZ, LWORK, N, ND, NH, NS, NV, NW
 12       LOGICAL            WANTT, WANTZ
 13 *     ..
 14 *     .. Array Arguments ..
 15       DOUBLE PRECISION   H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
 16      $                   V( LDV, * ), WORK( * ), WV( LDWV, * ),
 17      $                   Z( LDZ, * )
 18 *     ..
 19 *
 20 *     This subroutine is identical to DLAQR3 except that it avoids
 21 *     recursion by calling DLAHQR instead of DLAQR4.
 22 *
 23 *
 24 *     ******************************************************************
 25 *     Aggressive early deflation:
 26 *
 27 *     This subroutine accepts as input an upper Hessenberg matrix
 28 *     H and performs an orthogonal similarity transformation
 29 *     designed to detect and deflate fully converged eigenvalues from
 30 *     a trailing principal submatrix.  On output H has been over-
 31 *     written by a new Hessenberg matrix that is a perturbation of
 32 *     an orthogonal similarity transformation of H.  It is to be
 33 *     hoped that the final version of H has many zero subdiagonal
 34 *     entries.
 35 *
 36 *     ******************************************************************
 37 *     WANTT   (input) LOGICAL
 38 *          If .TRUE., then the Hessenberg matrix H is fully updated
 39 *          so that the quasi-triangular Schur factor may be
 40 *          computed (in cooperation with the calling subroutine).
 41 *          If .FALSE., then only enough of H is updated to preserve
 42 *          the eigenvalues.
 43 *
 44 *     WANTZ   (input) LOGICAL
 45 *          If .TRUE., then the orthogonal matrix Z is updated so
 46 *          so that the orthogonal Schur factor may be computed
 47 *          (in cooperation with the calling subroutine).
 48 *          If .FALSE., then Z is not referenced.
 49 *
 50 *     N       (input) INTEGER
 51 *          The order of the matrix H and (if WANTZ is .TRUE.) the
 52 *          order of the orthogonal matrix Z.
 53 *
 54 *     KTOP    (input) INTEGER
 55 *          It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
 56 *          KBOT and KTOP together determine an isolated block
 57 *          along the diagonal of the Hessenberg matrix.
 58 *
 59 *     KBOT    (input) INTEGER
 60 *          It is assumed without a check that either
 61 *          KBOT = N or H(KBOT+1,KBOT)=0.  KBOT and KTOP together
 62 *          determine an isolated block along the diagonal of the
 63 *          Hessenberg matrix.
 64 *
 65 *     NW      (input) INTEGER
 66 *          Deflation window size.  1 .LE. NW .LE. (KBOT-KTOP+1).
 67 *
 68 *     H       (input/output) DOUBLE PRECISION array, dimension (LDH,N)
 69 *          On input the initial N-by-N section of H stores the
 70 *          Hessenberg matrix undergoing aggressive early deflation.
 71 *          On output H has been transformed by an orthogonal
 72 *          similarity transformation, perturbed, and the returned
 73 *          to Hessenberg form that (it is to be hoped) has some
 74 *          zero subdiagonal entries.
 75 *
 76 *     LDH     (input) integer
 77 *          Leading dimension of H just as declared in the calling
 78 *          subroutine.  N .LE. LDH
 79 *
 80 *     ILOZ    (input) INTEGER
 81 *     IHIZ    (input) INTEGER
 82 *          Specify the rows of Z to which transformations must be
 83 *          applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
 84 *
 85 *     Z       (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
 86 *          IF WANTZ is .TRUE., then on output, the orthogonal
 87 *          similarity transformation mentioned above has been
 88 *          accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
 89 *          If WANTZ is .FALSE., then Z is unreferenced.
 90 *
 91 *     LDZ     (input) integer
 92 *          The leading dimension of Z just as declared in the
 93 *          calling subroutine.  1 .LE. LDZ.
 94 *
 95 *     NS      (output) integer
 96 *          The number of unconverged (ie approximate) eigenvalues
 97 *          returned in SR and SI that may be used as shifts by the
 98 *          calling subroutine.
 99 *
100 *     ND      (output) integer
101 *          The number of converged eigenvalues uncovered by this
102 *          subroutine.
103 *
104 *     SR      (output) DOUBLE PRECISION array, dimension (KBOT)
105 *     SI      (output) DOUBLE PRECISION array, dimension (KBOT)
106 *          On output, the real and imaginary parts of approximate
107 *          eigenvalues that may be used for shifts are stored in
108 *          SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109 *          SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110 *          The real and imaginary parts of converged eigenvalues
111 *          are stored in SR(KBOT-ND+1) through SR(KBOT) and
112 *          SI(KBOT-ND+1) through SI(KBOT), respectively.
113 *
114 *     V       (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
115 *          An NW-by-NW work array.
116 *
117 *     LDV     (input) integer scalar
118 *          The leading dimension of V just as declared in the
119 *          calling subroutine.  NW .LE. LDV
120 *
121 *     NH      (input) integer scalar
122 *          The number of columns of T.  NH.GE.NW.
123 *
124 *     T       (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
125 *
126 *     LDT     (input) integer
127 *          The leading dimension of T just as declared in the
128 *          calling subroutine.  NW .LE. LDT
129 *
130 *     NV      (input) integer
131 *          The number of rows of work array WV available for
132 *          workspace.  NV.GE.NW.
133 *
134 *     WV      (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
135 *
136 *     LDWV    (input) integer
137 *          The leading dimension of W just as declared in the
138 *          calling subroutine.  NW .LE. LDV
139 *
140 *     WORK    (workspace) DOUBLE PRECISION array, dimension (LWORK)
141 *          On exit, WORK(1) is set to an estimate of the optimal value
142 *          of LWORK for the given values of N, NW, KTOP and KBOT.
143 *
144 *     LWORK   (input) integer
145 *          The dimension of the work array WORK.  LWORK = 2*NW
146 *          suffices, but greater efficiency may result from larger
147 *          values of LWORK.
148 *
149 *          If LWORK = -1, then a workspace query is assumed; DLAQR2
150 *          only estimates the optimal workspace size for the given
151 *          values of N, NW, KTOP and KBOT.  The estimate is returned
152 *          in WORK(1).  No error message related to LWORK is issued
153 *          by XERBLA.  Neither H nor Z are accessed.
154 *
155 *     ================================================================
156 *     Based on contributions by
157 *        Karen Braman and Ralph Byers, Department of Mathematics,
158 *        University of Kansas, USA
159 *
160 *     ================================================================
161 *     .. Parameters ..
162       DOUBLE PRECISION   ZERO, ONE
163       PARAMETER          ( ZERO = 0.0d0, ONE = 1.0d0 )
164 *     ..
165 *     .. Local Scalars ..
166       DOUBLE PRECISION   AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167      $                   SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168       INTEGER            I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169      $                   KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170      $                   LWKOPT
171       LOGICAL            BULGE, SORTED
172 *     ..
173 *     .. External Functions ..
174       DOUBLE PRECISION   DLAMCH
175       EXTERNAL           DLAMCH
176 *     ..
177 *     .. External Subroutines ..
178       EXTERNAL           DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
179      $                   DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC
180 *     ..
181 *     .. Intrinsic Functions ..
182       INTRINSIC          ABSDBLEINTMAXMINSQRT
183 *     ..
184 *     .. Executable Statements ..
185 *
186 *     ==== Estimate optimal workspace. ====
187 *
188       JW = MIN( NW, KBOT-KTOP+1 )
189       IF( JW.LE.2 ) THEN
190          LWKOPT = 1
191       ELSE
192 *
193 *        ==== Workspace query call to DGEHRD ====
194 *
195          CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196          LWK1 = INT( WORK( 1 ) )
197 *
198 *        ==== Workspace query call to DORMHR ====
199 *
200          CALL DORMHR( 'R''N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201      $                WORK, -1, INFO )
202          LWK2 = INT( WORK( 1 ) )
203 *
204 *        ==== Optimal workspace ====
205 *
206          LWKOPT = JW + MAX( LWK1, LWK2 )
207       END IF
208 *
209 *     ==== Quick return in case of workspace query. ====
210 *
211       IF( LWORK.EQ.-1 ) THEN
212          WORK( 1 ) = DBLE( LWKOPT )
213          RETURN
214       END IF
215 *
216 *     ==== Nothing to do ...
217 *     ... for an empty active block ... ====
218       NS = 0
219       ND = 0
220       WORK( 1 ) = ONE
221       IF( KTOP.GT.KBOT )
222      $   RETURN
223 *     ... nor for an empty deflation window. ====
224       IF( NW.LT.1 )
225      $   RETURN
226 *
227 *     ==== Machine constants ====
228 *
229       SAFMIN = DLAMCH( 'SAFE MINIMUM' )
230       SAFMAX = ONE / SAFMIN
231       CALL DLABAD( SAFMIN, SAFMAX )
232       ULP = DLAMCH( 'PRECISION' )
233       SMLNUM = SAFMIN*DBLE( N ) / ULP )
234 *
235 *     ==== Setup deflation window ====
236 *
237       JW = MIN( NW, KBOT-KTOP+1 )
238       KWTOP = KBOT - JW + 1
239       IF( KWTOP.EQ.KTOP ) THEN
240          S = ZERO
241       ELSE
242          S = H( KWTOP, KWTOP-1 )
243       END IF
244 *
245       IF( KBOT.EQ.KWTOP ) THEN
246 *
247 *        ==== 1-by-1 deflation window: not much to do ====
248 *
249          SR( KWTOP ) = H( KWTOP, KWTOP )
250          SI( KWTOP ) = ZERO
251          NS = 1
252          ND = 0
253          IFABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
254      $        THEN
255             NS = 0
256             ND = 1
257             IF( KWTOP.GT.KTOP )
258      $         H( KWTOP, KWTOP-1 ) = ZERO
259          END IF
260          WORK( 1 ) = ONE
261          RETURN
262       END IF
263 *
264 *     ==== Convert to spike-triangular form.  (In case of a
265 *     .    rare QR failure, this routine continues to do
266 *     .    aggressive early deflation using that part of
267 *     .    the deflation window that converged using INFQR
268 *     .    here and there to keep track.) ====
269 *
270       CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
271       CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 21 ), LDT+1 )
272 *
273       CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
274       CALL DLAHQR( .true..true., JW, 1, JW, T, LDT, SR( KWTOP ),
275      $             SI( KWTOP ), 1, JW, V, LDV, INFQR )
276 *
277 *     ==== DTREXC needs a clean margin near the diagonal ====
278 *
279       DO 10 J = 1, JW - 3
280          T( J+2, J ) = ZERO
281          T( J+3, J ) = ZERO
282    10 CONTINUE
283       IF( JW.GT.2 )
284      $   T( JW, JW-2 ) = ZERO
285 *
286 *     ==== Deflation detection loop ====
287 *
288       NS = JW
289       ILST = INFQR + 1
290    20 CONTINUE
291       IF( ILST.LE.NS ) THEN
292          IF( NS.EQ.1 ) THEN
293             BULGE = .FALSE.
294          ELSE
295             BULGE = T( NS, NS-1 ).NE.ZERO
296          END IF
297 *
298 *        ==== Small spike tip test for deflation ====
299 *
300          IF.NOT.BULGE ) THEN
301 *
302 *           ==== Real eigenvalue ====
303 *
304             FOO = ABS( T( NS, NS ) )
305             IF( FOO.EQ.ZERO )
306      $         FOO = ABS( S )
307             IFABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
308 *
309 *              ==== Deflatable ====
310 *
311                NS = NS - 1
312             ELSE
313 *
314 *              ==== Undeflatable.   Move it up out of the way.
315 *              .    (DTREXC can not fail in this case.) ====
316 *
317                IFST = NS
318                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
319      $                      INFO )
320                ILST = ILST + 1
321             END IF
322          ELSE
323 *
324 *           ==== Complex conjugate pair ====
325 *
326             FOO = ABS( T( NS, NS ) ) + SQRTABS( T( NS, NS-1 ) ) )*
327      $            SQRTABS( T( NS-1, NS ) ) )
328             IF( FOO.EQ.ZERO )
329      $         FOO = ABS( S )
330             IFMAXABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
331      $          MAX( SMLNUM, ULP*FOO ) ) THEN
332 *
333 *              ==== Deflatable ====
334 *
335                NS = NS - 2
336             ELSE
337 *
338 *              ==== Undeflatable. Move them up out of the way.
339 *              .    Fortunately, DTREXC does the right thing with
340 *              .    ILST in case of a rare exchange failure. ====
341 *
342                IFST = NS
343                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
344      $                      INFO )
345                ILST = ILST + 2
346             END IF
347          END IF
348 *
349 *        ==== End deflation detection loop ====
350 *
351          GO TO 20
352       END IF
353 *
354 *        ==== Return to Hessenberg form ====
355 *
356       IF( NS.EQ.0 )
357      $   S = ZERO
358 *
359       IF( NS.LT.JW ) THEN
360 *
361 *        ==== sorting diagonal blocks of T improves accuracy for
362 *        .    graded matrices.  Bubble sort deals well with
363 *        .    exchange failures. ====
364 *
365          SORTED = .false.
366          I = NS + 1
367    30    CONTINUE
368          IF( SORTED )
369      $      GO TO 50
370          SORTED = .true.
371 *
372          KEND = I - 1
373          I = INFQR + 1
374          IF( I.EQ.NS ) THEN
375             K = I + 1
376          ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
377             K = I + 1
378          ELSE
379             K = I + 2
380          END IF
381    40    CONTINUE
382          IF( K.LE.KEND ) THEN
383             IF( K.EQ.I+1 ) THEN
384                EVI = ABS( T( I, I ) )
385             ELSE
386                EVI = ABS( T( I, I ) ) + SQRTABS( T( I+1, I ) ) )*
387      $               SQRTABS( T( I, I+1 ) ) )
388             END IF
389 *
390             IF( K.EQ.KEND ) THEN
391                EVK = ABS( T( K, K ) )
392             ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
393                EVK = ABS( T( K, K ) )
394             ELSE
395                EVK = ABS( T( K, K ) ) + SQRTABS( T( K+1, K ) ) )*
396      $               SQRTABS( T( K, K+1 ) ) )
397             END IF
398 *
399             IF( EVI.GE.EVK ) THEN
400                I = K
401             ELSE
402                SORTED = .false.
403                IFST = I
404                ILST = K
405                CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
406      $                      INFO )
407                IF( INFO.EQ.0 ) THEN
408                   I = ILST
409                ELSE
410                   I = K
411                END IF
412             END IF
413             IF( I.EQ.KEND ) THEN
414                K = I + 1
415             ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
416                K = I + 1
417             ELSE
418                K = I + 2
419             END IF
420             GO TO 40
421          END IF
422          GO TO 30
423    50    CONTINUE
424       END IF
425 *
426 *     ==== Restore shift/eigenvalue array from T ====
427 *
428       I = JW
429    60 CONTINUE
430       IF( I.GE.INFQR+1 ) THEN
431          IF( I.EQ.INFQR+1 ) THEN
432             SR( KWTOP+I-1 ) = T( I, I )
433             SI( KWTOP+I-1 ) = ZERO
434             I = I - 1
435          ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
436             SR( KWTOP+I-1 ) = T( I, I )
437             SI( KWTOP+I-1 ) = ZERO
438             I = I - 1
439          ELSE
440             AA = T( I-1, I-1 )
441             CC = T( I, I-1 )
442             BB = T( I-1, I )
443             DD = T( I, I )
444             CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
445      $                   SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
446      $                   SI( KWTOP+I-1 ), CS, SN )
447             I = I - 2
448          END IF
449          GO TO 60
450       END IF
451 *
452       IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
453          IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
454 *
455 *           ==== Reflect spike back into lower triangle ====
456 *
457             CALL DCOPY( NS, V, LDV, WORK, 1 )
458             BETA = WORK( 1 )
459             CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
460             WORK( 1 ) = ONE
461 *
462             CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 31 ), LDT )
463 *
464             CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
465      $                  WORK( JW+1 ) )
466             CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
467      $                  WORK( JW+1 ) )
468             CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
469      $                  WORK( JW+1 ) )
470 *
471             CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
472      $                   LWORK-JW, INFO )
473          END IF
474 *
475 *        ==== Copy updated reduced window into place ====
476 *
477          IF( KWTOP.GT.1 )
478      $      H( KWTOP, KWTOP-1 ) = S*V( 11 )
479          CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
480          CALL DCOPY( JW-1, T( 21 ), LDT+1, H( KWTOP+1, KWTOP ),
481      $               LDH+1 )
482 *
483 *        ==== Accumulate orthogonal matrix in order update
484 *        .    H and Z, if requested.  ====
485 *
486          IF( NS.GT.1 .AND. S.NE.ZERO )
487      $      CALL DORMHR( 'R''N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
488      $                   WORK( JW+1 ), LWORK-JW, INFO )
489 *
490 *        ==== Update vertical slab in H ====
491 *
492          IF( WANTT ) THEN
493             LTOP = 1
494          ELSE
495             LTOP = KTOP
496          END IF
497          DO 70 KROW = LTOP, KWTOP - 1, NV
498             KLN = MIN( NV, KWTOP-KROW )
499             CALL DGEMM( 'N''N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
500      $                  LDH, V, LDV, ZERO, WV, LDWV )
501             CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
502    70    CONTINUE
503 *
504 *        ==== Update horizontal slab in H ====
505 *
506          IF( WANTT ) THEN
507             DO 80 KCOL = KBOT + 1, N, NH
508                KLN = MIN( NH, N-KCOL+1 )
509                CALL DGEMM( 'C''N', JW, KLN, JW, ONE, V, LDV,
510      $                     H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
511                CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
512      $                      LDH )
513    80       CONTINUE
514          END IF
515 *
516 *        ==== Update vertical slab in Z ====
517 *
518          IF( WANTZ ) THEN
519             DO 90 KROW = ILOZ, IHIZ, NV
520                KLN = MIN( NV, IHIZ-KROW+1 )
521                CALL DGEMM( 'N''N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
522      $                     LDZ, V, LDV, ZERO, WV, LDWV )
523                CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
524      $                      LDZ )
525    90       CONTINUE
526          END IF
527       END IF
528 *
529 *     ==== Return the number of deflations ... ====
530 *
531       ND = JW - NS
532 *
533 *     ==== ... and the number of shifts. (Subtracting
534 *     .    INFQR from the spike length takes care
535 *     .    of the case of a rare QR failure while
536 *     .    calculating eigenvalues of the deflation
537 *     .    window.)  ====
538 *
539       NS = NS - INFQR
540 *
541 *      ==== Return optimal workspace. ====
542 *
543       WORK( 1 ) = DBLE( LWKOPT )
544 *
545 *     ==== End of DLAQR2 ====
546 *
547       END