1 SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3 $ LDT, NV, WV, LDWV, WORK, LWORK )
4 *
5 * -- LAPACK auxiliary routine (version 3.2.2) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
7 * -- June 2010 --
8 *
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12 LOGICAL WANTT, WANTZ
13 * ..
14 * .. Array Arguments ..
15 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16 $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17 $ Z( LDZ, * )
18 * ..
19 *
20 * This subroutine is identical to DLAQR3 except that it avoids
21 * recursion by calling DLAHQR instead of DLAQR4.
22 *
23 *
24 * ******************************************************************
25 * Aggressive early deflation:
26 *
27 * This subroutine accepts as input an upper Hessenberg matrix
28 * H and performs an orthogonal similarity transformation
29 * designed to detect and deflate fully converged eigenvalues from
30 * a trailing principal submatrix. On output H has been over-
31 * written by a new Hessenberg matrix that is a perturbation of
32 * an orthogonal similarity transformation of H. It is to be
33 * hoped that the final version of H has many zero subdiagonal
34 * entries.
35 *
36 * ******************************************************************
37 * WANTT (input) LOGICAL
38 * If .TRUE., then the Hessenberg matrix H is fully updated
39 * so that the quasi-triangular Schur factor may be
40 * computed (in cooperation with the calling subroutine).
41 * If .FALSE., then only enough of H is updated to preserve
42 * the eigenvalues.
43 *
44 * WANTZ (input) LOGICAL
45 * If .TRUE., then the orthogonal matrix Z is updated so
46 * so that the orthogonal Schur factor may be computed
47 * (in cooperation with the calling subroutine).
48 * If .FALSE., then Z is not referenced.
49 *
50 * N (input) INTEGER
51 * The order of the matrix H and (if WANTZ is .TRUE.) the
52 * order of the orthogonal matrix Z.
53 *
54 * KTOP (input) INTEGER
55 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
56 * KBOT and KTOP together determine an isolated block
57 * along the diagonal of the Hessenberg matrix.
58 *
59 * KBOT (input) INTEGER
60 * It is assumed without a check that either
61 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
62 * determine an isolated block along the diagonal of the
63 * Hessenberg matrix.
64 *
65 * NW (input) INTEGER
66 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
67 *
68 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
69 * On input the initial N-by-N section of H stores the
70 * Hessenberg matrix undergoing aggressive early deflation.
71 * On output H has been transformed by an orthogonal
72 * similarity transformation, perturbed, and the returned
73 * to Hessenberg form that (it is to be hoped) has some
74 * zero subdiagonal entries.
75 *
76 * LDH (input) integer
77 * Leading dimension of H just as declared in the calling
78 * subroutine. N .LE. LDH
79 *
80 * ILOZ (input) INTEGER
81 * IHIZ (input) INTEGER
82 * Specify the rows of Z to which transformations must be
83 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
84 *
85 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
86 * IF WANTZ is .TRUE., then on output, the orthogonal
87 * similarity transformation mentioned above has been
88 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
89 * If WANTZ is .FALSE., then Z is unreferenced.
90 *
91 * LDZ (input) integer
92 * The leading dimension of Z just as declared in the
93 * calling subroutine. 1 .LE. LDZ.
94 *
95 * NS (output) integer
96 * The number of unconverged (ie approximate) eigenvalues
97 * returned in SR and SI that may be used as shifts by the
98 * calling subroutine.
99 *
100 * ND (output) integer
101 * The number of converged eigenvalues uncovered by this
102 * subroutine.
103 *
104 * SR (output) DOUBLE PRECISION array, dimension (KBOT)
105 * SI (output) DOUBLE PRECISION array, dimension (KBOT)
106 * On output, the real and imaginary parts of approximate
107 * eigenvalues that may be used for shifts are stored in
108 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110 * The real and imaginary parts of converged eigenvalues
111 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
112 * SI(KBOT-ND+1) through SI(KBOT), respectively.
113 *
114 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
115 * An NW-by-NW work array.
116 *
117 * LDV (input) integer scalar
118 * The leading dimension of V just as declared in the
119 * calling subroutine. NW .LE. LDV
120 *
121 * NH (input) integer scalar
122 * The number of columns of T. NH.GE.NW.
123 *
124 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
125 *
126 * LDT (input) integer
127 * The leading dimension of T just as declared in the
128 * calling subroutine. NW .LE. LDT
129 *
130 * NV (input) integer
131 * The number of rows of work array WV available for
132 * workspace. NV.GE.NW.
133 *
134 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
135 *
136 * LDWV (input) integer
137 * The leading dimension of W just as declared in the
138 * calling subroutine. NW .LE. LDV
139 *
140 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
141 * On exit, WORK(1) is set to an estimate of the optimal value
142 * of LWORK for the given values of N, NW, KTOP and KBOT.
143 *
144 * LWORK (input) integer
145 * The dimension of the work array WORK. LWORK = 2*NW
146 * suffices, but greater efficiency may result from larger
147 * values of LWORK.
148 *
149 * If LWORK = -1, then a workspace query is assumed; DLAQR2
150 * only estimates the optimal workspace size for the given
151 * values of N, NW, KTOP and KBOT. The estimate is returned
152 * in WORK(1). No error message related to LWORK is issued
153 * by XERBLA. Neither H nor Z are accessed.
154 *
155 * ================================================================
156 * Based on contributions by
157 * Karen Braman and Ralph Byers, Department of Mathematics,
158 * University of Kansas, USA
159 *
160 * ================================================================
161 * .. Parameters ..
162 DOUBLE PRECISION ZERO, ONE
163 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
164 * ..
165 * .. Local Scalars ..
166 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170 $ LWKOPT
171 LOGICAL BULGE, SORTED
172 * ..
173 * .. External Functions ..
174 DOUBLE PRECISION DLAMCH
175 EXTERNAL DLAMCH
176 * ..
177 * .. External Subroutines ..
178 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
179 $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC
180 * ..
181 * .. Intrinsic Functions ..
182 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
183 * ..
184 * .. Executable Statements ..
185 *
186 * ==== Estimate optimal workspace. ====
187 *
188 JW = MIN( NW, KBOT-KTOP+1 )
189 IF( JW.LE.2 ) THEN
190 LWKOPT = 1
191 ELSE
192 *
193 * ==== Workspace query call to DGEHRD ====
194 *
195 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196 LWK1 = INT( WORK( 1 ) )
197 *
198 * ==== Workspace query call to DORMHR ====
199 *
200 CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201 $ WORK, -1, INFO )
202 LWK2 = INT( WORK( 1 ) )
203 *
204 * ==== Optimal workspace ====
205 *
206 LWKOPT = JW + MAX( LWK1, LWK2 )
207 END IF
208 *
209 * ==== Quick return in case of workspace query. ====
210 *
211 IF( LWORK.EQ.-1 ) THEN
212 WORK( 1 ) = DBLE( LWKOPT )
213 RETURN
214 END IF
215 *
216 * ==== Nothing to do ...
217 * ... for an empty active block ... ====
218 NS = 0
219 ND = 0
220 WORK( 1 ) = ONE
221 IF( KTOP.GT.KBOT )
222 $ RETURN
223 * ... nor for an empty deflation window. ====
224 IF( NW.LT.1 )
225 $ RETURN
226 *
227 * ==== Machine constants ====
228 *
229 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
230 SAFMAX = ONE / SAFMIN
231 CALL DLABAD( SAFMIN, SAFMAX )
232 ULP = DLAMCH( 'PRECISION' )
233 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
234 *
235 * ==== Setup deflation window ====
236 *
237 JW = MIN( NW, KBOT-KTOP+1 )
238 KWTOP = KBOT - JW + 1
239 IF( KWTOP.EQ.KTOP ) THEN
240 S = ZERO
241 ELSE
242 S = H( KWTOP, KWTOP-1 )
243 END IF
244 *
245 IF( KBOT.EQ.KWTOP ) THEN
246 *
247 * ==== 1-by-1 deflation window: not much to do ====
248 *
249 SR( KWTOP ) = H( KWTOP, KWTOP )
250 SI( KWTOP ) = ZERO
251 NS = 1
252 ND = 0
253 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
254 $ THEN
255 NS = 0
256 ND = 1
257 IF( KWTOP.GT.KTOP )
258 $ H( KWTOP, KWTOP-1 ) = ZERO
259 END IF
260 WORK( 1 ) = ONE
261 RETURN
262 END IF
263 *
264 * ==== Convert to spike-triangular form. (In case of a
265 * . rare QR failure, this routine continues to do
266 * . aggressive early deflation using that part of
267 * . the deflation window that converged using INFQR
268 * . here and there to keep track.) ====
269 *
270 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
271 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
272 *
273 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
274 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
275 $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
276 *
277 * ==== DTREXC needs a clean margin near the diagonal ====
278 *
279 DO 10 J = 1, JW - 3
280 T( J+2, J ) = ZERO
281 T( J+3, J ) = ZERO
282 10 CONTINUE
283 IF( JW.GT.2 )
284 $ T( JW, JW-2 ) = ZERO
285 *
286 * ==== Deflation detection loop ====
287 *
288 NS = JW
289 ILST = INFQR + 1
290 20 CONTINUE
291 IF( ILST.LE.NS ) THEN
292 IF( NS.EQ.1 ) THEN
293 BULGE = .FALSE.
294 ELSE
295 BULGE = T( NS, NS-1 ).NE.ZERO
296 END IF
297 *
298 * ==== Small spike tip test for deflation ====
299 *
300 IF( .NOT.BULGE ) THEN
301 *
302 * ==== Real eigenvalue ====
303 *
304 FOO = ABS( T( NS, NS ) )
305 IF( FOO.EQ.ZERO )
306 $ FOO = ABS( S )
307 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
308 *
309 * ==== Deflatable ====
310 *
311 NS = NS - 1
312 ELSE
313 *
314 * ==== Undeflatable. Move it up out of the way.
315 * . (DTREXC can not fail in this case.) ====
316 *
317 IFST = NS
318 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
319 $ INFO )
320 ILST = ILST + 1
321 END IF
322 ELSE
323 *
324 * ==== Complex conjugate pair ====
325 *
326 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
327 $ SQRT( ABS( T( NS-1, NS ) ) )
328 IF( FOO.EQ.ZERO )
329 $ FOO = ABS( S )
330 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
331 $ MAX( SMLNUM, ULP*FOO ) ) THEN
332 *
333 * ==== Deflatable ====
334 *
335 NS = NS - 2
336 ELSE
337 *
338 * ==== Undeflatable. Move them up out of the way.
339 * . Fortunately, DTREXC does the right thing with
340 * . ILST in case of a rare exchange failure. ====
341 *
342 IFST = NS
343 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
344 $ INFO )
345 ILST = ILST + 2
346 END IF
347 END IF
348 *
349 * ==== End deflation detection loop ====
350 *
351 GO TO 20
352 END IF
353 *
354 * ==== Return to Hessenberg form ====
355 *
356 IF( NS.EQ.0 )
357 $ S = ZERO
358 *
359 IF( NS.LT.JW ) THEN
360 *
361 * ==== sorting diagonal blocks of T improves accuracy for
362 * . graded matrices. Bubble sort deals well with
363 * . exchange failures. ====
364 *
365 SORTED = .false.
366 I = NS + 1
367 30 CONTINUE
368 IF( SORTED )
369 $ GO TO 50
370 SORTED = .true.
371 *
372 KEND = I - 1
373 I = INFQR + 1
374 IF( I.EQ.NS ) THEN
375 K = I + 1
376 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
377 K = I + 1
378 ELSE
379 K = I + 2
380 END IF
381 40 CONTINUE
382 IF( K.LE.KEND ) THEN
383 IF( K.EQ.I+1 ) THEN
384 EVI = ABS( T( I, I ) )
385 ELSE
386 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
387 $ SQRT( ABS( T( I, I+1 ) ) )
388 END IF
389 *
390 IF( K.EQ.KEND ) THEN
391 EVK = ABS( T( K, K ) )
392 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
393 EVK = ABS( T( K, K ) )
394 ELSE
395 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
396 $ SQRT( ABS( T( K, K+1 ) ) )
397 END IF
398 *
399 IF( EVI.GE.EVK ) THEN
400 I = K
401 ELSE
402 SORTED = .false.
403 IFST = I
404 ILST = K
405 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
406 $ INFO )
407 IF( INFO.EQ.0 ) THEN
408 I = ILST
409 ELSE
410 I = K
411 END IF
412 END IF
413 IF( I.EQ.KEND ) THEN
414 K = I + 1
415 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
416 K = I + 1
417 ELSE
418 K = I + 2
419 END IF
420 GO TO 40
421 END IF
422 GO TO 30
423 50 CONTINUE
424 END IF
425 *
426 * ==== Restore shift/eigenvalue array from T ====
427 *
428 I = JW
429 60 CONTINUE
430 IF( I.GE.INFQR+1 ) THEN
431 IF( I.EQ.INFQR+1 ) THEN
432 SR( KWTOP+I-1 ) = T( I, I )
433 SI( KWTOP+I-1 ) = ZERO
434 I = I - 1
435 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
436 SR( KWTOP+I-1 ) = T( I, I )
437 SI( KWTOP+I-1 ) = ZERO
438 I = I - 1
439 ELSE
440 AA = T( I-1, I-1 )
441 CC = T( I, I-1 )
442 BB = T( I-1, I )
443 DD = T( I, I )
444 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
445 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
446 $ SI( KWTOP+I-1 ), CS, SN )
447 I = I - 2
448 END IF
449 GO TO 60
450 END IF
451 *
452 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
453 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
454 *
455 * ==== Reflect spike back into lower triangle ====
456 *
457 CALL DCOPY( NS, V, LDV, WORK, 1 )
458 BETA = WORK( 1 )
459 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
460 WORK( 1 ) = ONE
461 *
462 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
463 *
464 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
465 $ WORK( JW+1 ) )
466 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
467 $ WORK( JW+1 ) )
468 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
469 $ WORK( JW+1 ) )
470 *
471 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
472 $ LWORK-JW, INFO )
473 END IF
474 *
475 * ==== Copy updated reduced window into place ====
476 *
477 IF( KWTOP.GT.1 )
478 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
479 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
480 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
481 $ LDH+1 )
482 *
483 * ==== Accumulate orthogonal matrix in order update
484 * . H and Z, if requested. ====
485 *
486 IF( NS.GT.1 .AND. S.NE.ZERO )
487 $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
488 $ WORK( JW+1 ), LWORK-JW, INFO )
489 *
490 * ==== Update vertical slab in H ====
491 *
492 IF( WANTT ) THEN
493 LTOP = 1
494 ELSE
495 LTOP = KTOP
496 END IF
497 DO 70 KROW = LTOP, KWTOP - 1, NV
498 KLN = MIN( NV, KWTOP-KROW )
499 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
500 $ LDH, V, LDV, ZERO, WV, LDWV )
501 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
502 70 CONTINUE
503 *
504 * ==== Update horizontal slab in H ====
505 *
506 IF( WANTT ) THEN
507 DO 80 KCOL = KBOT + 1, N, NH
508 KLN = MIN( NH, N-KCOL+1 )
509 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
510 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
511 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
512 $ LDH )
513 80 CONTINUE
514 END IF
515 *
516 * ==== Update vertical slab in Z ====
517 *
518 IF( WANTZ ) THEN
519 DO 90 KROW = ILOZ, IHIZ, NV
520 KLN = MIN( NV, IHIZ-KROW+1 )
521 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
522 $ LDZ, V, LDV, ZERO, WV, LDWV )
523 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
524 $ LDZ )
525 90 CONTINUE
526 END IF
527 END IF
528 *
529 * ==== Return the number of deflations ... ====
530 *
531 ND = JW - NS
532 *
533 * ==== ... and the number of shifts. (Subtracting
534 * . INFQR from the spike length takes care
535 * . of the case of a rare QR failure while
536 * . calculating eigenvalues of the deflation
537 * . window.) ====
538 *
539 NS = NS - INFQR
540 *
541 * ==== Return optimal workspace. ====
542 *
543 WORK( 1 ) = DBLE( LWKOPT )
544 *
545 * ==== End of DLAQR2 ====
546 *
547 END
2 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3 $ LDT, NV, WV, LDWV, WORK, LWORK )
4 *
5 * -- LAPACK auxiliary routine (version 3.2.2) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
7 * -- June 2010 --
8 *
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12 LOGICAL WANTT, WANTZ
13 * ..
14 * .. Array Arguments ..
15 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16 $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17 $ Z( LDZ, * )
18 * ..
19 *
20 * This subroutine is identical to DLAQR3 except that it avoids
21 * recursion by calling DLAHQR instead of DLAQR4.
22 *
23 *
24 * ******************************************************************
25 * Aggressive early deflation:
26 *
27 * This subroutine accepts as input an upper Hessenberg matrix
28 * H and performs an orthogonal similarity transformation
29 * designed to detect and deflate fully converged eigenvalues from
30 * a trailing principal submatrix. On output H has been over-
31 * written by a new Hessenberg matrix that is a perturbation of
32 * an orthogonal similarity transformation of H. It is to be
33 * hoped that the final version of H has many zero subdiagonal
34 * entries.
35 *
36 * ******************************************************************
37 * WANTT (input) LOGICAL
38 * If .TRUE., then the Hessenberg matrix H is fully updated
39 * so that the quasi-triangular Schur factor may be
40 * computed (in cooperation with the calling subroutine).
41 * If .FALSE., then only enough of H is updated to preserve
42 * the eigenvalues.
43 *
44 * WANTZ (input) LOGICAL
45 * If .TRUE., then the orthogonal matrix Z is updated so
46 * so that the orthogonal Schur factor may be computed
47 * (in cooperation with the calling subroutine).
48 * If .FALSE., then Z is not referenced.
49 *
50 * N (input) INTEGER
51 * The order of the matrix H and (if WANTZ is .TRUE.) the
52 * order of the orthogonal matrix Z.
53 *
54 * KTOP (input) INTEGER
55 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
56 * KBOT and KTOP together determine an isolated block
57 * along the diagonal of the Hessenberg matrix.
58 *
59 * KBOT (input) INTEGER
60 * It is assumed without a check that either
61 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
62 * determine an isolated block along the diagonal of the
63 * Hessenberg matrix.
64 *
65 * NW (input) INTEGER
66 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
67 *
68 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
69 * On input the initial N-by-N section of H stores the
70 * Hessenberg matrix undergoing aggressive early deflation.
71 * On output H has been transformed by an orthogonal
72 * similarity transformation, perturbed, and the returned
73 * to Hessenberg form that (it is to be hoped) has some
74 * zero subdiagonal entries.
75 *
76 * LDH (input) integer
77 * Leading dimension of H just as declared in the calling
78 * subroutine. N .LE. LDH
79 *
80 * ILOZ (input) INTEGER
81 * IHIZ (input) INTEGER
82 * Specify the rows of Z to which transformations must be
83 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
84 *
85 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
86 * IF WANTZ is .TRUE., then on output, the orthogonal
87 * similarity transformation mentioned above has been
88 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
89 * If WANTZ is .FALSE., then Z is unreferenced.
90 *
91 * LDZ (input) integer
92 * The leading dimension of Z just as declared in the
93 * calling subroutine. 1 .LE. LDZ.
94 *
95 * NS (output) integer
96 * The number of unconverged (ie approximate) eigenvalues
97 * returned in SR and SI that may be used as shifts by the
98 * calling subroutine.
99 *
100 * ND (output) integer
101 * The number of converged eigenvalues uncovered by this
102 * subroutine.
103 *
104 * SR (output) DOUBLE PRECISION array, dimension (KBOT)
105 * SI (output) DOUBLE PRECISION array, dimension (KBOT)
106 * On output, the real and imaginary parts of approximate
107 * eigenvalues that may be used for shifts are stored in
108 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
109 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
110 * The real and imaginary parts of converged eigenvalues
111 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
112 * SI(KBOT-ND+1) through SI(KBOT), respectively.
113 *
114 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
115 * An NW-by-NW work array.
116 *
117 * LDV (input) integer scalar
118 * The leading dimension of V just as declared in the
119 * calling subroutine. NW .LE. LDV
120 *
121 * NH (input) integer scalar
122 * The number of columns of T. NH.GE.NW.
123 *
124 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
125 *
126 * LDT (input) integer
127 * The leading dimension of T just as declared in the
128 * calling subroutine. NW .LE. LDT
129 *
130 * NV (input) integer
131 * The number of rows of work array WV available for
132 * workspace. NV.GE.NW.
133 *
134 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
135 *
136 * LDWV (input) integer
137 * The leading dimension of W just as declared in the
138 * calling subroutine. NW .LE. LDV
139 *
140 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
141 * On exit, WORK(1) is set to an estimate of the optimal value
142 * of LWORK for the given values of N, NW, KTOP and KBOT.
143 *
144 * LWORK (input) integer
145 * The dimension of the work array WORK. LWORK = 2*NW
146 * suffices, but greater efficiency may result from larger
147 * values of LWORK.
148 *
149 * If LWORK = -1, then a workspace query is assumed; DLAQR2
150 * only estimates the optimal workspace size for the given
151 * values of N, NW, KTOP and KBOT. The estimate is returned
152 * in WORK(1). No error message related to LWORK is issued
153 * by XERBLA. Neither H nor Z are accessed.
154 *
155 * ================================================================
156 * Based on contributions by
157 * Karen Braman and Ralph Byers, Department of Mathematics,
158 * University of Kansas, USA
159 *
160 * ================================================================
161 * .. Parameters ..
162 DOUBLE PRECISION ZERO, ONE
163 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
164 * ..
165 * .. Local Scalars ..
166 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
167 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
168 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
169 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2,
170 $ LWKOPT
171 LOGICAL BULGE, SORTED
172 * ..
173 * .. External Functions ..
174 DOUBLE PRECISION DLAMCH
175 EXTERNAL DLAMCH
176 * ..
177 * .. External Subroutines ..
178 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
179 $ DLANV2, DLARF, DLARFG, DLASET, DORMHR, DTREXC
180 * ..
181 * .. Intrinsic Functions ..
182 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
183 * ..
184 * .. Executable Statements ..
185 *
186 * ==== Estimate optimal workspace. ====
187 *
188 JW = MIN( NW, KBOT-KTOP+1 )
189 IF( JW.LE.2 ) THEN
190 LWKOPT = 1
191 ELSE
192 *
193 * ==== Workspace query call to DGEHRD ====
194 *
195 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
196 LWK1 = INT( WORK( 1 ) )
197 *
198 * ==== Workspace query call to DORMHR ====
199 *
200 CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
201 $ WORK, -1, INFO )
202 LWK2 = INT( WORK( 1 ) )
203 *
204 * ==== Optimal workspace ====
205 *
206 LWKOPT = JW + MAX( LWK1, LWK2 )
207 END IF
208 *
209 * ==== Quick return in case of workspace query. ====
210 *
211 IF( LWORK.EQ.-1 ) THEN
212 WORK( 1 ) = DBLE( LWKOPT )
213 RETURN
214 END IF
215 *
216 * ==== Nothing to do ...
217 * ... for an empty active block ... ====
218 NS = 0
219 ND = 0
220 WORK( 1 ) = ONE
221 IF( KTOP.GT.KBOT )
222 $ RETURN
223 * ... nor for an empty deflation window. ====
224 IF( NW.LT.1 )
225 $ RETURN
226 *
227 * ==== Machine constants ====
228 *
229 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
230 SAFMAX = ONE / SAFMIN
231 CALL DLABAD( SAFMIN, SAFMAX )
232 ULP = DLAMCH( 'PRECISION' )
233 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
234 *
235 * ==== Setup deflation window ====
236 *
237 JW = MIN( NW, KBOT-KTOP+1 )
238 KWTOP = KBOT - JW + 1
239 IF( KWTOP.EQ.KTOP ) THEN
240 S = ZERO
241 ELSE
242 S = H( KWTOP, KWTOP-1 )
243 END IF
244 *
245 IF( KBOT.EQ.KWTOP ) THEN
246 *
247 * ==== 1-by-1 deflation window: not much to do ====
248 *
249 SR( KWTOP ) = H( KWTOP, KWTOP )
250 SI( KWTOP ) = ZERO
251 NS = 1
252 ND = 0
253 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
254 $ THEN
255 NS = 0
256 ND = 1
257 IF( KWTOP.GT.KTOP )
258 $ H( KWTOP, KWTOP-1 ) = ZERO
259 END IF
260 WORK( 1 ) = ONE
261 RETURN
262 END IF
263 *
264 * ==== Convert to spike-triangular form. (In case of a
265 * . rare QR failure, this routine continues to do
266 * . aggressive early deflation using that part of
267 * . the deflation window that converged using INFQR
268 * . here and there to keep track.) ====
269 *
270 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
271 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
272 *
273 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
274 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
275 $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
276 *
277 * ==== DTREXC needs a clean margin near the diagonal ====
278 *
279 DO 10 J = 1, JW - 3
280 T( J+2, J ) = ZERO
281 T( J+3, J ) = ZERO
282 10 CONTINUE
283 IF( JW.GT.2 )
284 $ T( JW, JW-2 ) = ZERO
285 *
286 * ==== Deflation detection loop ====
287 *
288 NS = JW
289 ILST = INFQR + 1
290 20 CONTINUE
291 IF( ILST.LE.NS ) THEN
292 IF( NS.EQ.1 ) THEN
293 BULGE = .FALSE.
294 ELSE
295 BULGE = T( NS, NS-1 ).NE.ZERO
296 END IF
297 *
298 * ==== Small spike tip test for deflation ====
299 *
300 IF( .NOT.BULGE ) THEN
301 *
302 * ==== Real eigenvalue ====
303 *
304 FOO = ABS( T( NS, NS ) )
305 IF( FOO.EQ.ZERO )
306 $ FOO = ABS( S )
307 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
308 *
309 * ==== Deflatable ====
310 *
311 NS = NS - 1
312 ELSE
313 *
314 * ==== Undeflatable. Move it up out of the way.
315 * . (DTREXC can not fail in this case.) ====
316 *
317 IFST = NS
318 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
319 $ INFO )
320 ILST = ILST + 1
321 END IF
322 ELSE
323 *
324 * ==== Complex conjugate pair ====
325 *
326 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
327 $ SQRT( ABS( T( NS-1, NS ) ) )
328 IF( FOO.EQ.ZERO )
329 $ FOO = ABS( S )
330 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
331 $ MAX( SMLNUM, ULP*FOO ) ) THEN
332 *
333 * ==== Deflatable ====
334 *
335 NS = NS - 2
336 ELSE
337 *
338 * ==== Undeflatable. Move them up out of the way.
339 * . Fortunately, DTREXC does the right thing with
340 * . ILST in case of a rare exchange failure. ====
341 *
342 IFST = NS
343 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
344 $ INFO )
345 ILST = ILST + 2
346 END IF
347 END IF
348 *
349 * ==== End deflation detection loop ====
350 *
351 GO TO 20
352 END IF
353 *
354 * ==== Return to Hessenberg form ====
355 *
356 IF( NS.EQ.0 )
357 $ S = ZERO
358 *
359 IF( NS.LT.JW ) THEN
360 *
361 * ==== sorting diagonal blocks of T improves accuracy for
362 * . graded matrices. Bubble sort deals well with
363 * . exchange failures. ====
364 *
365 SORTED = .false.
366 I = NS + 1
367 30 CONTINUE
368 IF( SORTED )
369 $ GO TO 50
370 SORTED = .true.
371 *
372 KEND = I - 1
373 I = INFQR + 1
374 IF( I.EQ.NS ) THEN
375 K = I + 1
376 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
377 K = I + 1
378 ELSE
379 K = I + 2
380 END IF
381 40 CONTINUE
382 IF( K.LE.KEND ) THEN
383 IF( K.EQ.I+1 ) THEN
384 EVI = ABS( T( I, I ) )
385 ELSE
386 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
387 $ SQRT( ABS( T( I, I+1 ) ) )
388 END IF
389 *
390 IF( K.EQ.KEND ) THEN
391 EVK = ABS( T( K, K ) )
392 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
393 EVK = ABS( T( K, K ) )
394 ELSE
395 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
396 $ SQRT( ABS( T( K, K+1 ) ) )
397 END IF
398 *
399 IF( EVI.GE.EVK ) THEN
400 I = K
401 ELSE
402 SORTED = .false.
403 IFST = I
404 ILST = K
405 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
406 $ INFO )
407 IF( INFO.EQ.0 ) THEN
408 I = ILST
409 ELSE
410 I = K
411 END IF
412 END IF
413 IF( I.EQ.KEND ) THEN
414 K = I + 1
415 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
416 K = I + 1
417 ELSE
418 K = I + 2
419 END IF
420 GO TO 40
421 END IF
422 GO TO 30
423 50 CONTINUE
424 END IF
425 *
426 * ==== Restore shift/eigenvalue array from T ====
427 *
428 I = JW
429 60 CONTINUE
430 IF( I.GE.INFQR+1 ) THEN
431 IF( I.EQ.INFQR+1 ) THEN
432 SR( KWTOP+I-1 ) = T( I, I )
433 SI( KWTOP+I-1 ) = ZERO
434 I = I - 1
435 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
436 SR( KWTOP+I-1 ) = T( I, I )
437 SI( KWTOP+I-1 ) = ZERO
438 I = I - 1
439 ELSE
440 AA = T( I-1, I-1 )
441 CC = T( I, I-1 )
442 BB = T( I-1, I )
443 DD = T( I, I )
444 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
445 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
446 $ SI( KWTOP+I-1 ), CS, SN )
447 I = I - 2
448 END IF
449 GO TO 60
450 END IF
451 *
452 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
453 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
454 *
455 * ==== Reflect spike back into lower triangle ====
456 *
457 CALL DCOPY( NS, V, LDV, WORK, 1 )
458 BETA = WORK( 1 )
459 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
460 WORK( 1 ) = ONE
461 *
462 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
463 *
464 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
465 $ WORK( JW+1 ) )
466 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
467 $ WORK( JW+1 ) )
468 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
469 $ WORK( JW+1 ) )
470 *
471 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
472 $ LWORK-JW, INFO )
473 END IF
474 *
475 * ==== Copy updated reduced window into place ====
476 *
477 IF( KWTOP.GT.1 )
478 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
479 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
480 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
481 $ LDH+1 )
482 *
483 * ==== Accumulate orthogonal matrix in order update
484 * . H and Z, if requested. ====
485 *
486 IF( NS.GT.1 .AND. S.NE.ZERO )
487 $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
488 $ WORK( JW+1 ), LWORK-JW, INFO )
489 *
490 * ==== Update vertical slab in H ====
491 *
492 IF( WANTT ) THEN
493 LTOP = 1
494 ELSE
495 LTOP = KTOP
496 END IF
497 DO 70 KROW = LTOP, KWTOP - 1, NV
498 KLN = MIN( NV, KWTOP-KROW )
499 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
500 $ LDH, V, LDV, ZERO, WV, LDWV )
501 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
502 70 CONTINUE
503 *
504 * ==== Update horizontal slab in H ====
505 *
506 IF( WANTT ) THEN
507 DO 80 KCOL = KBOT + 1, N, NH
508 KLN = MIN( NH, N-KCOL+1 )
509 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
510 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
511 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
512 $ LDH )
513 80 CONTINUE
514 END IF
515 *
516 * ==== Update vertical slab in Z ====
517 *
518 IF( WANTZ ) THEN
519 DO 90 KROW = ILOZ, IHIZ, NV
520 KLN = MIN( NV, IHIZ-KROW+1 )
521 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
522 $ LDZ, V, LDV, ZERO, WV, LDWV )
523 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
524 $ LDZ )
525 90 CONTINUE
526 END IF
527 END IF
528 *
529 * ==== Return the number of deflations ... ====
530 *
531 ND = JW - NS
532 *
533 * ==== ... and the number of shifts. (Subtracting
534 * . INFQR from the spike length takes care
535 * . of the case of a rare QR failure while
536 * . calculating eigenvalues of the deflation
537 * . window.) ====
538 *
539 NS = NS - INFQR
540 *
541 * ==== Return optimal workspace. ====
542 *
543 WORK( 1 ) = DBLE( LWKOPT )
544 *
545 * ==== End of DLAQR2 ====
546 *
547 END