1 SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
2 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3 $ LDT, NV, WV, LDWV, WORK, LWORK )
4 *
5 * -- LAPACK auxiliary routine (version 3.2.2) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
7 * -- June 2010 --
8 *
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12 LOGICAL WANTT, WANTZ
13 * ..
14 * .. Array Arguments ..
15 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16 $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17 $ Z( LDZ, * )
18 * ..
19 *
20 * ******************************************************************
21 * Aggressive early deflation:
22 *
23 * This subroutine accepts as input an upper Hessenberg matrix
24 * H and performs an orthogonal similarity transformation
25 * designed to detect and deflate fully converged eigenvalues from
26 * a trailing principal submatrix. On output H has been over-
27 * written by a new Hessenberg matrix that is a perturbation of
28 * an orthogonal similarity transformation of H. It is to be
29 * hoped that the final version of H has many zero subdiagonal
30 * entries.
31 *
32 * ******************************************************************
33 * WANTT (input) LOGICAL
34 * If .TRUE., then the Hessenberg matrix H is fully updated
35 * so that the quasi-triangular Schur factor may be
36 * computed (in cooperation with the calling subroutine).
37 * If .FALSE., then only enough of H is updated to preserve
38 * the eigenvalues.
39 *
40 * WANTZ (input) LOGICAL
41 * If .TRUE., then the orthogonal matrix Z is updated so
42 * so that the orthogonal Schur factor may be computed
43 * (in cooperation with the calling subroutine).
44 * If .FALSE., then Z is not referenced.
45 *
46 * N (input) INTEGER
47 * The order of the matrix H and (if WANTZ is .TRUE.) the
48 * order of the orthogonal matrix Z.
49 *
50 * KTOP (input) INTEGER
51 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
52 * KBOT and KTOP together determine an isolated block
53 * along the diagonal of the Hessenberg matrix.
54 *
55 * KBOT (input) INTEGER
56 * It is assumed without a check that either
57 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
58 * determine an isolated block along the diagonal of the
59 * Hessenberg matrix.
60 *
61 * NW (input) INTEGER
62 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
63 *
64 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
65 * On input the initial N-by-N section of H stores the
66 * Hessenberg matrix undergoing aggressive early deflation.
67 * On output H has been transformed by an orthogonal
68 * similarity transformation, perturbed, and the returned
69 * to Hessenberg form that (it is to be hoped) has some
70 * zero subdiagonal entries.
71 *
72 * LDH (input) integer
73 * Leading dimension of H just as declared in the calling
74 * subroutine. N .LE. LDH
75 *
76 * ILOZ (input) INTEGER
77 * IHIZ (input) INTEGER
78 * Specify the rows of Z to which transformations must be
79 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
80 *
81 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
82 * IF WANTZ is .TRUE., then on output, the orthogonal
83 * similarity transformation mentioned above has been
84 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
85 * If WANTZ is .FALSE., then Z is unreferenced.
86 *
87 * LDZ (input) integer
88 * The leading dimension of Z just as declared in the
89 * calling subroutine. 1 .LE. LDZ.
90 *
91 * NS (output) integer
92 * The number of unconverged (ie approximate) eigenvalues
93 * returned in SR and SI that may be used as shifts by the
94 * calling subroutine.
95 *
96 * ND (output) integer
97 * The number of converged eigenvalues uncovered by this
98 * subroutine.
99 *
100 * SR (output) DOUBLE PRECISION array, dimension (KBOT)
101 * SI (output) DOUBLE PRECISION array, dimension (KBOT)
102 * On output, the real and imaginary parts of approximate
103 * eigenvalues that may be used for shifts are stored in
104 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
105 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
106 * The real and imaginary parts of converged eigenvalues
107 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
108 * SI(KBOT-ND+1) through SI(KBOT), respectively.
109 *
110 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
111 * An NW-by-NW work array.
112 *
113 * LDV (input) integer scalar
114 * The leading dimension of V just as declared in the
115 * calling subroutine. NW .LE. LDV
116 *
117 * NH (input) integer scalar
118 * The number of columns of T. NH.GE.NW.
119 *
120 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
121 *
122 * LDT (input) integer
123 * The leading dimension of T just as declared in the
124 * calling subroutine. NW .LE. LDT
125 *
126 * NV (input) integer
127 * The number of rows of work array WV available for
128 * workspace. NV.GE.NW.
129 *
130 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
131 *
132 * LDWV (input) integer
133 * The leading dimension of W just as declared in the
134 * calling subroutine. NW .LE. LDV
135 *
136 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
137 * On exit, WORK(1) is set to an estimate of the optimal value
138 * of LWORK for the given values of N, NW, KTOP and KBOT.
139 *
140 * LWORK (input) integer
141 * The dimension of the work array WORK. LWORK = 2*NW
142 * suffices, but greater efficiency may result from larger
143 * values of LWORK.
144 *
145 * If LWORK = -1, then a workspace query is assumed; DLAQR3
146 * only estimates the optimal workspace size for the given
147 * values of N, NW, KTOP and KBOT. The estimate is returned
148 * in WORK(1). No error message related to LWORK is issued
149 * by XERBLA. Neither H nor Z are accessed.
150 *
151 * ================================================================
152 * Based on contributions by
153 * Karen Braman and Ralph Byers, Department of Mathematics,
154 * University of Kansas, USA
155 *
156 * ================================================================
157 * .. Parameters ..
158 DOUBLE PRECISION ZERO, ONE
159 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
160 * ..
161 * .. Local Scalars ..
162 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
163 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
164 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
165 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166 $ LWKOPT, NMIN
167 LOGICAL BULGE, SORTED
168 * ..
169 * .. External Functions ..
170 DOUBLE PRECISION DLAMCH
171 INTEGER ILAENV
172 EXTERNAL DLAMCH, ILAENV
173 * ..
174 * .. External Subroutines ..
175 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
176 $ DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR,
177 $ DTREXC
178 * ..
179 * .. Intrinsic Functions ..
180 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
181 * ..
182 * .. Executable Statements ..
183 *
184 * ==== Estimate optimal workspace. ====
185 *
186 JW = MIN( NW, KBOT-KTOP+1 )
187 IF( JW.LE.2 ) THEN
188 LWKOPT = 1
189 ELSE
190 *
191 * ==== Workspace query call to DGEHRD ====
192 *
193 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
194 LWK1 = INT( WORK( 1 ) )
195 *
196 * ==== Workspace query call to DORMHR ====
197 *
198 CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
199 $ WORK, -1, INFO )
200 LWK2 = INT( WORK( 1 ) )
201 *
202 * ==== Workspace query call to DLAQR4 ====
203 *
204 CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW,
205 $ V, LDV, WORK, -1, INFQR )
206 LWK3 = INT( WORK( 1 ) )
207 *
208 * ==== Optimal workspace ====
209 *
210 LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
211 END IF
212 *
213 * ==== Quick return in case of workspace query. ====
214 *
215 IF( LWORK.EQ.-1 ) THEN
216 WORK( 1 ) = DBLE( LWKOPT )
217 RETURN
218 END IF
219 *
220 * ==== Nothing to do ...
221 * ... for an empty active block ... ====
222 NS = 0
223 ND = 0
224 WORK( 1 ) = ONE
225 IF( KTOP.GT.KBOT )
226 $ RETURN
227 * ... nor for an empty deflation window. ====
228 IF( NW.LT.1 )
229 $ RETURN
230 *
231 * ==== Machine constants ====
232 *
233 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
234 SAFMAX = ONE / SAFMIN
235 CALL DLABAD( SAFMIN, SAFMAX )
236 ULP = DLAMCH( 'PRECISION' )
237 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
238 *
239 * ==== Setup deflation window ====
240 *
241 JW = MIN( NW, KBOT-KTOP+1 )
242 KWTOP = KBOT - JW + 1
243 IF( KWTOP.EQ.KTOP ) THEN
244 S = ZERO
245 ELSE
246 S = H( KWTOP, KWTOP-1 )
247 END IF
248 *
249 IF( KBOT.EQ.KWTOP ) THEN
250 *
251 * ==== 1-by-1 deflation window: not much to do ====
252 *
253 SR( KWTOP ) = H( KWTOP, KWTOP )
254 SI( KWTOP ) = ZERO
255 NS = 1
256 ND = 0
257 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
258 $ THEN
259 NS = 0
260 ND = 1
261 IF( KWTOP.GT.KTOP )
262 $ H( KWTOP, KWTOP-1 ) = ZERO
263 END IF
264 WORK( 1 ) = ONE
265 RETURN
266 END IF
267 *
268 * ==== Convert to spike-triangular form. (In case of a
269 * . rare QR failure, this routine continues to do
270 * . aggressive early deflation using that part of
271 * . the deflation window that converged using INFQR
272 * . here and there to keep track.) ====
273 *
274 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
275 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
276 *
277 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
278 NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK )
279 IF( JW.GT.NMIN ) THEN
280 CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
281 $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )
282 ELSE
283 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
284 $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
285 END IF
286 *
287 * ==== DTREXC needs a clean margin near the diagonal ====
288 *
289 DO 10 J = 1, JW - 3
290 T( J+2, J ) = ZERO
291 T( J+3, J ) = ZERO
292 10 CONTINUE
293 IF( JW.GT.2 )
294 $ T( JW, JW-2 ) = ZERO
295 *
296 * ==== Deflation detection loop ====
297 *
298 NS = JW
299 ILST = INFQR + 1
300 20 CONTINUE
301 IF( ILST.LE.NS ) THEN
302 IF( NS.EQ.1 ) THEN
303 BULGE = .FALSE.
304 ELSE
305 BULGE = T( NS, NS-1 ).NE.ZERO
306 END IF
307 *
308 * ==== Small spike tip test for deflation ====
309 *
310 IF( .NOT.BULGE ) THEN
311 *
312 * ==== Real eigenvalue ====
313 *
314 FOO = ABS( T( NS, NS ) )
315 IF( FOO.EQ.ZERO )
316 $ FOO = ABS( S )
317 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
318 *
319 * ==== Deflatable ====
320 *
321 NS = NS - 1
322 ELSE
323 *
324 * ==== Undeflatable. Move it up out of the way.
325 * . (DTREXC can not fail in this case.) ====
326 *
327 IFST = NS
328 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
329 $ INFO )
330 ILST = ILST + 1
331 END IF
332 ELSE
333 *
334 * ==== Complex conjugate pair ====
335 *
336 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
337 $ SQRT( ABS( T( NS-1, NS ) ) )
338 IF( FOO.EQ.ZERO )
339 $ FOO = ABS( S )
340 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
341 $ MAX( SMLNUM, ULP*FOO ) ) THEN
342 *
343 * ==== Deflatable ====
344 *
345 NS = NS - 2
346 ELSE
347 *
348 * ==== Undeflatable. Move them up out of the way.
349 * . Fortunately, DTREXC does the right thing with
350 * . ILST in case of a rare exchange failure. ====
351 *
352 IFST = NS
353 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
354 $ INFO )
355 ILST = ILST + 2
356 END IF
357 END IF
358 *
359 * ==== End deflation detection loop ====
360 *
361 GO TO 20
362 END IF
363 *
364 * ==== Return to Hessenberg form ====
365 *
366 IF( NS.EQ.0 )
367 $ S = ZERO
368 *
369 IF( NS.LT.JW ) THEN
370 *
371 * ==== sorting diagonal blocks of T improves accuracy for
372 * . graded matrices. Bubble sort deals well with
373 * . exchange failures. ====
374 *
375 SORTED = .false.
376 I = NS + 1
377 30 CONTINUE
378 IF( SORTED )
379 $ GO TO 50
380 SORTED = .true.
381 *
382 KEND = I - 1
383 I = INFQR + 1
384 IF( I.EQ.NS ) THEN
385 K = I + 1
386 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
387 K = I + 1
388 ELSE
389 K = I + 2
390 END IF
391 40 CONTINUE
392 IF( K.LE.KEND ) THEN
393 IF( K.EQ.I+1 ) THEN
394 EVI = ABS( T( I, I ) )
395 ELSE
396 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
397 $ SQRT( ABS( T( I, I+1 ) ) )
398 END IF
399 *
400 IF( K.EQ.KEND ) THEN
401 EVK = ABS( T( K, K ) )
402 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
403 EVK = ABS( T( K, K ) )
404 ELSE
405 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
406 $ SQRT( ABS( T( K, K+1 ) ) )
407 END IF
408 *
409 IF( EVI.GE.EVK ) THEN
410 I = K
411 ELSE
412 SORTED = .false.
413 IFST = I
414 ILST = K
415 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
416 $ INFO )
417 IF( INFO.EQ.0 ) THEN
418 I = ILST
419 ELSE
420 I = K
421 END IF
422 END IF
423 IF( I.EQ.KEND ) THEN
424 K = I + 1
425 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
426 K = I + 1
427 ELSE
428 K = I + 2
429 END IF
430 GO TO 40
431 END IF
432 GO TO 30
433 50 CONTINUE
434 END IF
435 *
436 * ==== Restore shift/eigenvalue array from T ====
437 *
438 I = JW
439 60 CONTINUE
440 IF( I.GE.INFQR+1 ) THEN
441 IF( I.EQ.INFQR+1 ) THEN
442 SR( KWTOP+I-1 ) = T( I, I )
443 SI( KWTOP+I-1 ) = ZERO
444 I = I - 1
445 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
446 SR( KWTOP+I-1 ) = T( I, I )
447 SI( KWTOP+I-1 ) = ZERO
448 I = I - 1
449 ELSE
450 AA = T( I-1, I-1 )
451 CC = T( I, I-1 )
452 BB = T( I-1, I )
453 DD = T( I, I )
454 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
455 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
456 $ SI( KWTOP+I-1 ), CS, SN )
457 I = I - 2
458 END IF
459 GO TO 60
460 END IF
461 *
462 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
463 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
464 *
465 * ==== Reflect spike back into lower triangle ====
466 *
467 CALL DCOPY( NS, V, LDV, WORK, 1 )
468 BETA = WORK( 1 )
469 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
470 WORK( 1 ) = ONE
471 *
472 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
473 *
474 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
475 $ WORK( JW+1 ) )
476 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
477 $ WORK( JW+1 ) )
478 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
479 $ WORK( JW+1 ) )
480 *
481 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
482 $ LWORK-JW, INFO )
483 END IF
484 *
485 * ==== Copy updated reduced window into place ====
486 *
487 IF( KWTOP.GT.1 )
488 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
489 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
490 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
491 $ LDH+1 )
492 *
493 * ==== Accumulate orthogonal matrix in order update
494 * . H and Z, if requested. ====
495 *
496 IF( NS.GT.1 .AND. S.NE.ZERO )
497 $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
498 $ WORK( JW+1 ), LWORK-JW, INFO )
499 *
500 * ==== Update vertical slab in H ====
501 *
502 IF( WANTT ) THEN
503 LTOP = 1
504 ELSE
505 LTOP = KTOP
506 END IF
507 DO 70 KROW = LTOP, KWTOP - 1, NV
508 KLN = MIN( NV, KWTOP-KROW )
509 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
510 $ LDH, V, LDV, ZERO, WV, LDWV )
511 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
512 70 CONTINUE
513 *
514 * ==== Update horizontal slab in H ====
515 *
516 IF( WANTT ) THEN
517 DO 80 KCOL = KBOT + 1, N, NH
518 KLN = MIN( NH, N-KCOL+1 )
519 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
520 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
521 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
522 $ LDH )
523 80 CONTINUE
524 END IF
525 *
526 * ==== Update vertical slab in Z ====
527 *
528 IF( WANTZ ) THEN
529 DO 90 KROW = ILOZ, IHIZ, NV
530 KLN = MIN( NV, IHIZ-KROW+1 )
531 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
532 $ LDZ, V, LDV, ZERO, WV, LDWV )
533 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
534 $ LDZ )
535 90 CONTINUE
536 END IF
537 END IF
538 *
539 * ==== Return the number of deflations ... ====
540 *
541 ND = JW - NS
542 *
543 * ==== ... and the number of shifts. (Subtracting
544 * . INFQR from the spike length takes care
545 * . of the case of a rare QR failure while
546 * . calculating eigenvalues of the deflation
547 * . window.) ====
548 *
549 NS = NS - INFQR
550 *
551 * ==== Return optimal workspace. ====
552 *
553 WORK( 1 ) = DBLE( LWKOPT )
554 *
555 * ==== End of DLAQR3 ====
556 *
557 END
2 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
3 $ LDT, NV, WV, LDWV, WORK, LWORK )
4 *
5 * -- LAPACK auxiliary routine (version 3.2.2) --
6 * Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..
7 * -- June 2010 --
8 *
9 * .. Scalar Arguments ..
10 INTEGER IHIZ, ILOZ, KBOT, KTOP, LDH, LDT, LDV, LDWV,
11 $ LDZ, LWORK, N, ND, NH, NS, NV, NW
12 LOGICAL WANTT, WANTZ
13 * ..
14 * .. Array Arguments ..
15 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), T( LDT, * ),
16 $ V( LDV, * ), WORK( * ), WV( LDWV, * ),
17 $ Z( LDZ, * )
18 * ..
19 *
20 * ******************************************************************
21 * Aggressive early deflation:
22 *
23 * This subroutine accepts as input an upper Hessenberg matrix
24 * H and performs an orthogonal similarity transformation
25 * designed to detect and deflate fully converged eigenvalues from
26 * a trailing principal submatrix. On output H has been over-
27 * written by a new Hessenberg matrix that is a perturbation of
28 * an orthogonal similarity transformation of H. It is to be
29 * hoped that the final version of H has many zero subdiagonal
30 * entries.
31 *
32 * ******************************************************************
33 * WANTT (input) LOGICAL
34 * If .TRUE., then the Hessenberg matrix H is fully updated
35 * so that the quasi-triangular Schur factor may be
36 * computed (in cooperation with the calling subroutine).
37 * If .FALSE., then only enough of H is updated to preserve
38 * the eigenvalues.
39 *
40 * WANTZ (input) LOGICAL
41 * If .TRUE., then the orthogonal matrix Z is updated so
42 * so that the orthogonal Schur factor may be computed
43 * (in cooperation with the calling subroutine).
44 * If .FALSE., then Z is not referenced.
45 *
46 * N (input) INTEGER
47 * The order of the matrix H and (if WANTZ is .TRUE.) the
48 * order of the orthogonal matrix Z.
49 *
50 * KTOP (input) INTEGER
51 * It is assumed that either KTOP = 1 or H(KTOP,KTOP-1)=0.
52 * KBOT and KTOP together determine an isolated block
53 * along the diagonal of the Hessenberg matrix.
54 *
55 * KBOT (input) INTEGER
56 * It is assumed without a check that either
57 * KBOT = N or H(KBOT+1,KBOT)=0. KBOT and KTOP together
58 * determine an isolated block along the diagonal of the
59 * Hessenberg matrix.
60 *
61 * NW (input) INTEGER
62 * Deflation window size. 1 .LE. NW .LE. (KBOT-KTOP+1).
63 *
64 * H (input/output) DOUBLE PRECISION array, dimension (LDH,N)
65 * On input the initial N-by-N section of H stores the
66 * Hessenberg matrix undergoing aggressive early deflation.
67 * On output H has been transformed by an orthogonal
68 * similarity transformation, perturbed, and the returned
69 * to Hessenberg form that (it is to be hoped) has some
70 * zero subdiagonal entries.
71 *
72 * LDH (input) integer
73 * Leading dimension of H just as declared in the calling
74 * subroutine. N .LE. LDH
75 *
76 * ILOZ (input) INTEGER
77 * IHIZ (input) INTEGER
78 * Specify the rows of Z to which transformations must be
79 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N.
80 *
81 * Z (input/output) DOUBLE PRECISION array, dimension (LDZ,N)
82 * IF WANTZ is .TRUE., then on output, the orthogonal
83 * similarity transformation mentioned above has been
84 * accumulated into Z(ILOZ:IHIZ,ILO:IHI) from the right.
85 * If WANTZ is .FALSE., then Z is unreferenced.
86 *
87 * LDZ (input) integer
88 * The leading dimension of Z just as declared in the
89 * calling subroutine. 1 .LE. LDZ.
90 *
91 * NS (output) integer
92 * The number of unconverged (ie approximate) eigenvalues
93 * returned in SR and SI that may be used as shifts by the
94 * calling subroutine.
95 *
96 * ND (output) integer
97 * The number of converged eigenvalues uncovered by this
98 * subroutine.
99 *
100 * SR (output) DOUBLE PRECISION array, dimension (KBOT)
101 * SI (output) DOUBLE PRECISION array, dimension (KBOT)
102 * On output, the real and imaginary parts of approximate
103 * eigenvalues that may be used for shifts are stored in
104 * SR(KBOT-ND-NS+1) through SR(KBOT-ND) and
105 * SI(KBOT-ND-NS+1) through SI(KBOT-ND), respectively.
106 * The real and imaginary parts of converged eigenvalues
107 * are stored in SR(KBOT-ND+1) through SR(KBOT) and
108 * SI(KBOT-ND+1) through SI(KBOT), respectively.
109 *
110 * V (workspace) DOUBLE PRECISION array, dimension (LDV,NW)
111 * An NW-by-NW work array.
112 *
113 * LDV (input) integer scalar
114 * The leading dimension of V just as declared in the
115 * calling subroutine. NW .LE. LDV
116 *
117 * NH (input) integer scalar
118 * The number of columns of T. NH.GE.NW.
119 *
120 * T (workspace) DOUBLE PRECISION array, dimension (LDT,NW)
121 *
122 * LDT (input) integer
123 * The leading dimension of T just as declared in the
124 * calling subroutine. NW .LE. LDT
125 *
126 * NV (input) integer
127 * The number of rows of work array WV available for
128 * workspace. NV.GE.NW.
129 *
130 * WV (workspace) DOUBLE PRECISION array, dimension (LDWV,NW)
131 *
132 * LDWV (input) integer
133 * The leading dimension of W just as declared in the
134 * calling subroutine. NW .LE. LDV
135 *
136 * WORK (workspace) DOUBLE PRECISION array, dimension (LWORK)
137 * On exit, WORK(1) is set to an estimate of the optimal value
138 * of LWORK for the given values of N, NW, KTOP and KBOT.
139 *
140 * LWORK (input) integer
141 * The dimension of the work array WORK. LWORK = 2*NW
142 * suffices, but greater efficiency may result from larger
143 * values of LWORK.
144 *
145 * If LWORK = -1, then a workspace query is assumed; DLAQR3
146 * only estimates the optimal workspace size for the given
147 * values of N, NW, KTOP and KBOT. The estimate is returned
148 * in WORK(1). No error message related to LWORK is issued
149 * by XERBLA. Neither H nor Z are accessed.
150 *
151 * ================================================================
152 * Based on contributions by
153 * Karen Braman and Ralph Byers, Department of Mathematics,
154 * University of Kansas, USA
155 *
156 * ================================================================
157 * .. Parameters ..
158 DOUBLE PRECISION ZERO, ONE
159 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
160 * ..
161 * .. Local Scalars ..
162 DOUBLE PRECISION AA, BB, BETA, CC, CS, DD, EVI, EVK, FOO, S,
163 $ SAFMAX, SAFMIN, SMLNUM, SN, TAU, ULP
164 INTEGER I, IFST, ILST, INFO, INFQR, J, JW, K, KCOL,
165 $ KEND, KLN, KROW, KWTOP, LTOP, LWK1, LWK2, LWK3,
166 $ LWKOPT, NMIN
167 LOGICAL BULGE, SORTED
168 * ..
169 * .. External Functions ..
170 DOUBLE PRECISION DLAMCH
171 INTEGER ILAENV
172 EXTERNAL DLAMCH, ILAENV
173 * ..
174 * .. External Subroutines ..
175 EXTERNAL DCOPY, DGEHRD, DGEMM, DLABAD, DLACPY, DLAHQR,
176 $ DLANV2, DLAQR4, DLARF, DLARFG, DLASET, DORMHR,
177 $ DTREXC
178 * ..
179 * .. Intrinsic Functions ..
180 INTRINSIC ABS, DBLE, INT, MAX, MIN, SQRT
181 * ..
182 * .. Executable Statements ..
183 *
184 * ==== Estimate optimal workspace. ====
185 *
186 JW = MIN( NW, KBOT-KTOP+1 )
187 IF( JW.LE.2 ) THEN
188 LWKOPT = 1
189 ELSE
190 *
191 * ==== Workspace query call to DGEHRD ====
192 *
193 CALL DGEHRD( JW, 1, JW-1, T, LDT, WORK, WORK, -1, INFO )
194 LWK1 = INT( WORK( 1 ) )
195 *
196 * ==== Workspace query call to DORMHR ====
197 *
198 CALL DORMHR( 'R', 'N', JW, JW, 1, JW-1, T, LDT, WORK, V, LDV,
199 $ WORK, -1, INFO )
200 LWK2 = INT( WORK( 1 ) )
201 *
202 * ==== Workspace query call to DLAQR4 ====
203 *
204 CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR, SI, 1, JW,
205 $ V, LDV, WORK, -1, INFQR )
206 LWK3 = INT( WORK( 1 ) )
207 *
208 * ==== Optimal workspace ====
209 *
210 LWKOPT = MAX( JW+MAX( LWK1, LWK2 ), LWK3 )
211 END IF
212 *
213 * ==== Quick return in case of workspace query. ====
214 *
215 IF( LWORK.EQ.-1 ) THEN
216 WORK( 1 ) = DBLE( LWKOPT )
217 RETURN
218 END IF
219 *
220 * ==== Nothing to do ...
221 * ... for an empty active block ... ====
222 NS = 0
223 ND = 0
224 WORK( 1 ) = ONE
225 IF( KTOP.GT.KBOT )
226 $ RETURN
227 * ... nor for an empty deflation window. ====
228 IF( NW.LT.1 )
229 $ RETURN
230 *
231 * ==== Machine constants ====
232 *
233 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
234 SAFMAX = ONE / SAFMIN
235 CALL DLABAD( SAFMIN, SAFMAX )
236 ULP = DLAMCH( 'PRECISION' )
237 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
238 *
239 * ==== Setup deflation window ====
240 *
241 JW = MIN( NW, KBOT-KTOP+1 )
242 KWTOP = KBOT - JW + 1
243 IF( KWTOP.EQ.KTOP ) THEN
244 S = ZERO
245 ELSE
246 S = H( KWTOP, KWTOP-1 )
247 END IF
248 *
249 IF( KBOT.EQ.KWTOP ) THEN
250 *
251 * ==== 1-by-1 deflation window: not much to do ====
252 *
253 SR( KWTOP ) = H( KWTOP, KWTOP )
254 SI( KWTOP ) = ZERO
255 NS = 1
256 ND = 0
257 IF( ABS( S ).LE.MAX( SMLNUM, ULP*ABS( H( KWTOP, KWTOP ) ) ) )
258 $ THEN
259 NS = 0
260 ND = 1
261 IF( KWTOP.GT.KTOP )
262 $ H( KWTOP, KWTOP-1 ) = ZERO
263 END IF
264 WORK( 1 ) = ONE
265 RETURN
266 END IF
267 *
268 * ==== Convert to spike-triangular form. (In case of a
269 * . rare QR failure, this routine continues to do
270 * . aggressive early deflation using that part of
271 * . the deflation window that converged using INFQR
272 * . here and there to keep track.) ====
273 *
274 CALL DLACPY( 'U', JW, JW, H( KWTOP, KWTOP ), LDH, T, LDT )
275 CALL DCOPY( JW-1, H( KWTOP+1, KWTOP ), LDH+1, T( 2, 1 ), LDT+1 )
276 *
277 CALL DLASET( 'A', JW, JW, ZERO, ONE, V, LDV )
278 NMIN = ILAENV( 12, 'DLAQR3', 'SV', JW, 1, JW, LWORK )
279 IF( JW.GT.NMIN ) THEN
280 CALL DLAQR4( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
281 $ SI( KWTOP ), 1, JW, V, LDV, WORK, LWORK, INFQR )
282 ELSE
283 CALL DLAHQR( .true., .true., JW, 1, JW, T, LDT, SR( KWTOP ),
284 $ SI( KWTOP ), 1, JW, V, LDV, INFQR )
285 END IF
286 *
287 * ==== DTREXC needs a clean margin near the diagonal ====
288 *
289 DO 10 J = 1, JW - 3
290 T( J+2, J ) = ZERO
291 T( J+3, J ) = ZERO
292 10 CONTINUE
293 IF( JW.GT.2 )
294 $ T( JW, JW-2 ) = ZERO
295 *
296 * ==== Deflation detection loop ====
297 *
298 NS = JW
299 ILST = INFQR + 1
300 20 CONTINUE
301 IF( ILST.LE.NS ) THEN
302 IF( NS.EQ.1 ) THEN
303 BULGE = .FALSE.
304 ELSE
305 BULGE = T( NS, NS-1 ).NE.ZERO
306 END IF
307 *
308 * ==== Small spike tip test for deflation ====
309 *
310 IF( .NOT.BULGE ) THEN
311 *
312 * ==== Real eigenvalue ====
313 *
314 FOO = ABS( T( NS, NS ) )
315 IF( FOO.EQ.ZERO )
316 $ FOO = ABS( S )
317 IF( ABS( S*V( 1, NS ) ).LE.MAX( SMLNUM, ULP*FOO ) ) THEN
318 *
319 * ==== Deflatable ====
320 *
321 NS = NS - 1
322 ELSE
323 *
324 * ==== Undeflatable. Move it up out of the way.
325 * . (DTREXC can not fail in this case.) ====
326 *
327 IFST = NS
328 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
329 $ INFO )
330 ILST = ILST + 1
331 END IF
332 ELSE
333 *
334 * ==== Complex conjugate pair ====
335 *
336 FOO = ABS( T( NS, NS ) ) + SQRT( ABS( T( NS, NS-1 ) ) )*
337 $ SQRT( ABS( T( NS-1, NS ) ) )
338 IF( FOO.EQ.ZERO )
339 $ FOO = ABS( S )
340 IF( MAX( ABS( S*V( 1, NS ) ), ABS( S*V( 1, NS-1 ) ) ).LE.
341 $ MAX( SMLNUM, ULP*FOO ) ) THEN
342 *
343 * ==== Deflatable ====
344 *
345 NS = NS - 2
346 ELSE
347 *
348 * ==== Undeflatable. Move them up out of the way.
349 * . Fortunately, DTREXC does the right thing with
350 * . ILST in case of a rare exchange failure. ====
351 *
352 IFST = NS
353 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
354 $ INFO )
355 ILST = ILST + 2
356 END IF
357 END IF
358 *
359 * ==== End deflation detection loop ====
360 *
361 GO TO 20
362 END IF
363 *
364 * ==== Return to Hessenberg form ====
365 *
366 IF( NS.EQ.0 )
367 $ S = ZERO
368 *
369 IF( NS.LT.JW ) THEN
370 *
371 * ==== sorting diagonal blocks of T improves accuracy for
372 * . graded matrices. Bubble sort deals well with
373 * . exchange failures. ====
374 *
375 SORTED = .false.
376 I = NS + 1
377 30 CONTINUE
378 IF( SORTED )
379 $ GO TO 50
380 SORTED = .true.
381 *
382 KEND = I - 1
383 I = INFQR + 1
384 IF( I.EQ.NS ) THEN
385 K = I + 1
386 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
387 K = I + 1
388 ELSE
389 K = I + 2
390 END IF
391 40 CONTINUE
392 IF( K.LE.KEND ) THEN
393 IF( K.EQ.I+1 ) THEN
394 EVI = ABS( T( I, I ) )
395 ELSE
396 EVI = ABS( T( I, I ) ) + SQRT( ABS( T( I+1, I ) ) )*
397 $ SQRT( ABS( T( I, I+1 ) ) )
398 END IF
399 *
400 IF( K.EQ.KEND ) THEN
401 EVK = ABS( T( K, K ) )
402 ELSE IF( T( K+1, K ).EQ.ZERO ) THEN
403 EVK = ABS( T( K, K ) )
404 ELSE
405 EVK = ABS( T( K, K ) ) + SQRT( ABS( T( K+1, K ) ) )*
406 $ SQRT( ABS( T( K, K+1 ) ) )
407 END IF
408 *
409 IF( EVI.GE.EVK ) THEN
410 I = K
411 ELSE
412 SORTED = .false.
413 IFST = I
414 ILST = K
415 CALL DTREXC( 'V', JW, T, LDT, V, LDV, IFST, ILST, WORK,
416 $ INFO )
417 IF( INFO.EQ.0 ) THEN
418 I = ILST
419 ELSE
420 I = K
421 END IF
422 END IF
423 IF( I.EQ.KEND ) THEN
424 K = I + 1
425 ELSE IF( T( I+1, I ).EQ.ZERO ) THEN
426 K = I + 1
427 ELSE
428 K = I + 2
429 END IF
430 GO TO 40
431 END IF
432 GO TO 30
433 50 CONTINUE
434 END IF
435 *
436 * ==== Restore shift/eigenvalue array from T ====
437 *
438 I = JW
439 60 CONTINUE
440 IF( I.GE.INFQR+1 ) THEN
441 IF( I.EQ.INFQR+1 ) THEN
442 SR( KWTOP+I-1 ) = T( I, I )
443 SI( KWTOP+I-1 ) = ZERO
444 I = I - 1
445 ELSE IF( T( I, I-1 ).EQ.ZERO ) THEN
446 SR( KWTOP+I-1 ) = T( I, I )
447 SI( KWTOP+I-1 ) = ZERO
448 I = I - 1
449 ELSE
450 AA = T( I-1, I-1 )
451 CC = T( I, I-1 )
452 BB = T( I-1, I )
453 DD = T( I, I )
454 CALL DLANV2( AA, BB, CC, DD, SR( KWTOP+I-2 ),
455 $ SI( KWTOP+I-2 ), SR( KWTOP+I-1 ),
456 $ SI( KWTOP+I-1 ), CS, SN )
457 I = I - 2
458 END IF
459 GO TO 60
460 END IF
461 *
462 IF( NS.LT.JW .OR. S.EQ.ZERO ) THEN
463 IF( NS.GT.1 .AND. S.NE.ZERO ) THEN
464 *
465 * ==== Reflect spike back into lower triangle ====
466 *
467 CALL DCOPY( NS, V, LDV, WORK, 1 )
468 BETA = WORK( 1 )
469 CALL DLARFG( NS, BETA, WORK( 2 ), 1, TAU )
470 WORK( 1 ) = ONE
471 *
472 CALL DLASET( 'L', JW-2, JW-2, ZERO, ZERO, T( 3, 1 ), LDT )
473 *
474 CALL DLARF( 'L', NS, JW, WORK, 1, TAU, T, LDT,
475 $ WORK( JW+1 ) )
476 CALL DLARF( 'R', NS, NS, WORK, 1, TAU, T, LDT,
477 $ WORK( JW+1 ) )
478 CALL DLARF( 'R', JW, NS, WORK, 1, TAU, V, LDV,
479 $ WORK( JW+1 ) )
480 *
481 CALL DGEHRD( JW, 1, NS, T, LDT, WORK, WORK( JW+1 ),
482 $ LWORK-JW, INFO )
483 END IF
484 *
485 * ==== Copy updated reduced window into place ====
486 *
487 IF( KWTOP.GT.1 )
488 $ H( KWTOP, KWTOP-1 ) = S*V( 1, 1 )
489 CALL DLACPY( 'U', JW, JW, T, LDT, H( KWTOP, KWTOP ), LDH )
490 CALL DCOPY( JW-1, T( 2, 1 ), LDT+1, H( KWTOP+1, KWTOP ),
491 $ LDH+1 )
492 *
493 * ==== Accumulate orthogonal matrix in order update
494 * . H and Z, if requested. ====
495 *
496 IF( NS.GT.1 .AND. S.NE.ZERO )
497 $ CALL DORMHR( 'R', 'N', JW, NS, 1, NS, T, LDT, WORK, V, LDV,
498 $ WORK( JW+1 ), LWORK-JW, INFO )
499 *
500 * ==== Update vertical slab in H ====
501 *
502 IF( WANTT ) THEN
503 LTOP = 1
504 ELSE
505 LTOP = KTOP
506 END IF
507 DO 70 KROW = LTOP, KWTOP - 1, NV
508 KLN = MIN( NV, KWTOP-KROW )
509 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, H( KROW, KWTOP ),
510 $ LDH, V, LDV, ZERO, WV, LDWV )
511 CALL DLACPY( 'A', KLN, JW, WV, LDWV, H( KROW, KWTOP ), LDH )
512 70 CONTINUE
513 *
514 * ==== Update horizontal slab in H ====
515 *
516 IF( WANTT ) THEN
517 DO 80 KCOL = KBOT + 1, N, NH
518 KLN = MIN( NH, N-KCOL+1 )
519 CALL DGEMM( 'C', 'N', JW, KLN, JW, ONE, V, LDV,
520 $ H( KWTOP, KCOL ), LDH, ZERO, T, LDT )
521 CALL DLACPY( 'A', JW, KLN, T, LDT, H( KWTOP, KCOL ),
522 $ LDH )
523 80 CONTINUE
524 END IF
525 *
526 * ==== Update vertical slab in Z ====
527 *
528 IF( WANTZ ) THEN
529 DO 90 KROW = ILOZ, IHIZ, NV
530 KLN = MIN( NV, IHIZ-KROW+1 )
531 CALL DGEMM( 'N', 'N', KLN, JW, JW, ONE, Z( KROW, KWTOP ),
532 $ LDZ, V, LDV, ZERO, WV, LDWV )
533 CALL DLACPY( 'A', KLN, JW, WV, LDWV, Z( KROW, KWTOP ),
534 $ LDZ )
535 90 CONTINUE
536 END IF
537 END IF
538 *
539 * ==== Return the number of deflations ... ====
540 *
541 ND = JW - NS
542 *
543 * ==== ... and the number of shifts. (Subtracting
544 * . INFQR from the spike length takes care
545 * . of the case of a rare QR failure while
546 * . calculating eigenvalues of the deflation
547 * . window.) ====
548 *
549 NS = NS - INFQR
550 *
551 * ==== Return optimal workspace. ====
552 *
553 WORK( 1 ) = DBLE( LWKOPT )
554 *
555 * ==== End of DLAQR3 ====
556 *
557 END