1 SUBROUTINE DLAQR5( WANTT, WANTZ, KACC22, N, KTOP, KBOT, NSHFTS,
2 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
3 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
4 *
5 * -- LAPACK auxiliary routine (version 3.3.0) --
6 * -- LAPACK is a software package provided by Univ. of Tennessee, --
7 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
8 * November 2010
9 *
10 * .. Scalar Arguments ..
11 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
12 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
13 LOGICAL WANTT, WANTZ
14 * ..
15 * .. Array Arguments ..
16 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
17 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
18 $ Z( LDZ, * )
19 * ..
20 *
21 * This auxiliary subroutine called by DLAQR0 performs a
22 * single small-bulge multi-shift QR sweep.
23 *
24 * WANTT (input) logical scalar
25 * WANTT = .true. if the quasi-triangular Schur factor
26 * is being computed. WANTT is set to .false. otherwise.
27 *
28 * WANTZ (input) logical scalar
29 * WANTZ = .true. if the orthogonal Schur factor is being
30 * computed. WANTZ is set to .false. otherwise.
31 *
32 * KACC22 (input) integer with value 0, 1, or 2.
33 * Specifies the computation mode of far-from-diagonal
34 * orthogonal updates.
35 * = 0: DLAQR5 does not accumulate reflections and does not
36 * use matrix-matrix multiply to update far-from-diagonal
37 * matrix entries.
38 * = 1: DLAQR5 accumulates reflections and uses matrix-matrix
39 * multiply to update the far-from-diagonal matrix entries.
40 * = 2: DLAQR5 accumulates reflections, uses matrix-matrix
41 * multiply to update the far-from-diagonal matrix entries,
42 * and takes advantage of 2-by-2 block structure during
43 * matrix multiplies.
44 *
45 * N (input) integer scalar
46 * N is the order of the Hessenberg matrix H upon which this
47 * subroutine operates.
48 *
49 * KTOP (input) integer scalar
50 * KBOT (input) integer scalar
51 * These are the first and last rows and columns of an
52 * isolated diagonal block upon which the QR sweep is to be
53 * applied. It is assumed without a check that
54 * either KTOP = 1 or H(KTOP,KTOP-1) = 0
55 * and
56 * either KBOT = N or H(KBOT+1,KBOT) = 0.
57 *
58 * NSHFTS (input) integer scalar
59 * NSHFTS gives the number of simultaneous shifts. NSHFTS
60 * must be positive and even.
61 *
62 * SR (input/output) DOUBLE PRECISION array of size (NSHFTS)
63 * SI (input/output) DOUBLE PRECISION array of size (NSHFTS)
64 * SR contains the real parts and SI contains the imaginary
65 * parts of the NSHFTS shifts of origin that define the
66 * multi-shift QR sweep. On output SR and SI may be
67 * reordered.
68 *
69 * H (input/output) DOUBLE PRECISION array of size (LDH,N)
70 * On input H contains a Hessenberg matrix. On output a
71 * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
72 * to the isolated diagonal block in rows and columns KTOP
73 * through KBOT.
74 *
75 * LDH (input) integer scalar
76 * LDH is the leading dimension of H just as declared in the
77 * calling procedure. LDH.GE.MAX(1,N).
78 *
79 * ILOZ (input) INTEGER
80 * IHIZ (input) INTEGER
81 * Specify the rows of Z to which transformations must be
82 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
83 *
84 * Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI)
85 * If WANTZ = .TRUE., then the QR Sweep orthogonal
86 * similarity transformation is accumulated into
87 * Z(ILOZ:IHIZ,ILO:IHI) from the right.
88 * If WANTZ = .FALSE., then Z is unreferenced.
89 *
90 * LDZ (input) integer scalar
91 * LDA is the leading dimension of Z just as declared in
92 * the calling procedure. LDZ.GE.N.
93 *
94 * V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2)
95 *
96 * LDV (input) integer scalar
97 * LDV is the leading dimension of V as declared in the
98 * calling procedure. LDV.GE.3.
99 *
100 * U (workspace) DOUBLE PRECISION array of size
101 * (LDU,3*NSHFTS-3)
102 *
103 * LDU (input) integer scalar
104 * LDU is the leading dimension of U just as declared in the
105 * in the calling subroutine. LDU.GE.3*NSHFTS-3.
106 *
107 * NH (input) integer scalar
108 * NH is the number of columns in array WH available for
109 * workspace. NH.GE.1.
110 *
111 * WH (workspace) DOUBLE PRECISION array of size (LDWH,NH)
112 *
113 * LDWH (input) integer scalar
114 * Leading dimension of WH just as declared in the
115 * calling procedure. LDWH.GE.3*NSHFTS-3.
116 *
117 * NV (input) integer scalar
118 * NV is the number of rows in WV agailable for workspace.
119 * NV.GE.1.
120 *
121 * WV (workspace) DOUBLE PRECISION array of size
122 * (LDWV,3*NSHFTS-3)
123 *
124 * LDWV (input) integer scalar
125 * LDWV is the leading dimension of WV as declared in the
126 * in the calling subroutine. LDWV.GE.NV.
127 *
128 * ================================================================
129 * Based on contributions by
130 * Karen Braman and Ralph Byers, Department of Mathematics,
131 * University of Kansas, USA
132 *
133 * ================================================================
134 * Reference:
135 *
136 * K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
137 * Algorithm Part I: Maintaining Well Focused Shifts, and
138 * Level 3 Performance, SIAM Journal of Matrix Analysis,
139 * volume 23, pages 929--947, 2002.
140 *
141 * ================================================================
142 * .. Parameters ..
143 DOUBLE PRECISION ZERO, ONE
144 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
145 * ..
146 * .. Local Scalars ..
147 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
148 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
149 $ ULP
150 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
151 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
152 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
153 $ NS, NU
154 LOGICAL ACCUM, BLK22, BMP22
155 * ..
156 * .. External Functions ..
157 DOUBLE PRECISION DLAMCH
158 EXTERNAL DLAMCH
159 * ..
160 * .. Intrinsic Functions ..
161 *
162 INTRINSIC ABS, DBLE, MAX, MIN, MOD
163 * ..
164 * .. Local Arrays ..
165 DOUBLE PRECISION VT( 3 )
166 * ..
167 * .. External Subroutines ..
168 EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
169 $ DTRMM
170 * ..
171 * .. Executable Statements ..
172 *
173 * ==== If there are no shifts, then there is nothing to do. ====
174 *
175 IF( NSHFTS.LT.2 )
176 $ RETURN
177 *
178 * ==== If the active block is empty or 1-by-1, then there
179 * . is nothing to do. ====
180 *
181 IF( KTOP.GE.KBOT )
182 $ RETURN
183 *
184 * ==== Shuffle shifts into pairs of real shifts and pairs
185 * . of complex conjugate shifts assuming complex
186 * . conjugate shifts are already adjacent to one
187 * . another. ====
188 *
189 DO 10 I = 1, NSHFTS - 2, 2
190 IF( SI( I ).NE.-SI( I+1 ) ) THEN
191 *
192 SWAP = SR( I )
193 SR( I ) = SR( I+1 )
194 SR( I+1 ) = SR( I+2 )
195 SR( I+2 ) = SWAP
196 *
197 SWAP = SI( I )
198 SI( I ) = SI( I+1 )
199 SI( I+1 ) = SI( I+2 )
200 SI( I+2 ) = SWAP
201 END IF
202 10 CONTINUE
203 *
204 * ==== NSHFTS is supposed to be even, but if it is odd,
205 * . then simply reduce it by one. The shuffle above
206 * . ensures that the dropped shift is real and that
207 * . the remaining shifts are paired. ====
208 *
209 NS = NSHFTS - MOD( NSHFTS, 2 )
210 *
211 * ==== Machine constants for deflation ====
212 *
213 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
214 SAFMAX = ONE / SAFMIN
215 CALL DLABAD( SAFMIN, SAFMAX )
216 ULP = DLAMCH( 'PRECISION' )
217 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
218 *
219 * ==== Use accumulated reflections to update far-from-diagonal
220 * . entries ? ====
221 *
222 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
223 *
224 * ==== If so, exploit the 2-by-2 block structure? ====
225 *
226 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
227 *
228 * ==== clear trash ====
229 *
230 IF( KTOP+2.LE.KBOT )
231 $ H( KTOP+2, KTOP ) = ZERO
232 *
233 * ==== NBMPS = number of 2-shift bulges in the chain ====
234 *
235 NBMPS = NS / 2
236 *
237 * ==== KDU = width of slab ====
238 *
239 KDU = 6*NBMPS - 3
240 *
241 * ==== Create and chase chains of NBMPS bulges ====
242 *
243 DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
244 NDCOL = INCOL + KDU
245 IF( ACCUM )
246 $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
247 *
248 * ==== Near-the-diagonal bulge chase. The following loop
249 * . performs the near-the-diagonal part of a small bulge
250 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
251 * . chunk extends from column INCOL to column NDCOL
252 * . (including both column INCOL and column NDCOL). The
253 * . following loop chases a 3*NBMPS column long chain of
254 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
255 * . may be less than KTOP and and NDCOL may be greater than
256 * . KBOT indicating phantom columns from which to chase
257 * . bulges before they are actually introduced or to which
258 * . to chase bulges beyond column KBOT.) ====
259 *
260 DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
261 *
262 * ==== Bulges number MTOP to MBOT are active double implicit
263 * . shift bulges. There may or may not also be small
264 * . 2-by-2 bulge, if there is room. The inactive bulges
265 * . (if any) must wait until the active bulges have moved
266 * . down the diagonal to make room. The phantom matrix
267 * . paradigm described above helps keep track. ====
268 *
269 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
270 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
271 M22 = MBOT + 1
272 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
273 $ ( KBOT-2 )
274 *
275 * ==== Generate reflections to chase the chain right
276 * . one column. (The minimum value of K is KTOP-1.) ====
277 *
278 DO 20 M = MTOP, MBOT
279 K = KRCOL + 3*( M-1 )
280 IF( K.EQ.KTOP-1 ) THEN
281 CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
282 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
283 $ V( 1, M ) )
284 ALPHA = V( 1, M )
285 CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
286 ELSE
287 BETA = H( K+1, K )
288 V( 2, M ) = H( K+2, K )
289 V( 3, M ) = H( K+3, K )
290 CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
291 *
292 * ==== A Bulge may collapse because of vigilant
293 * . deflation or destructive underflow. In the
294 * . underflow case, try the two-small-subdiagonals
295 * . trick to try to reinflate the bulge. ====
296 *
297 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
298 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
299 *
300 * ==== Typical case: not collapsed (yet). ====
301 *
302 H( K+1, K ) = BETA
303 H( K+2, K ) = ZERO
304 H( K+3, K ) = ZERO
305 ELSE
306 *
307 * ==== Atypical case: collapsed. Attempt to
308 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
309 * . If the fill resulting from the new
310 * . reflector is too large, then abandon it.
311 * . Otherwise, use the new one. ====
312 *
313 CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
314 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
315 $ VT )
316 ALPHA = VT( 1 )
317 CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
318 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
319 $ H( K+2, K ) )
320 *
321 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
322 $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
323 $ ( ABS( H( K, K ) )+ABS( H( K+1,
324 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
325 *
326 * ==== Starting a new bulge here would
327 * . create non-negligible fill. Use
328 * . the old one with trepidation. ====
329 *
330 H( K+1, K ) = BETA
331 H( K+2, K ) = ZERO
332 H( K+3, K ) = ZERO
333 ELSE
334 *
335 * ==== Stating a new bulge here would
336 * . create only negligible fill.
337 * . Replace the old reflector with
338 * . the new one. ====
339 *
340 H( K+1, K ) = H( K+1, K ) - REFSUM
341 H( K+2, K ) = ZERO
342 H( K+3, K ) = ZERO
343 V( 1, M ) = VT( 1 )
344 V( 2, M ) = VT( 2 )
345 V( 3, M ) = VT( 3 )
346 END IF
347 END IF
348 END IF
349 20 CONTINUE
350 *
351 * ==== Generate a 2-by-2 reflection, if needed. ====
352 *
353 K = KRCOL + 3*( M22-1 )
354 IF( BMP22 ) THEN
355 IF( K.EQ.KTOP-1 ) THEN
356 CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
357 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
358 $ V( 1, M22 ) )
359 BETA = V( 1, M22 )
360 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
361 ELSE
362 BETA = H( K+1, K )
363 V( 2, M22 ) = H( K+2, K )
364 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
365 H( K+1, K ) = BETA
366 H( K+2, K ) = ZERO
367 END IF
368 END IF
369 *
370 * ==== Multiply H by reflections from the left ====
371 *
372 IF( ACCUM ) THEN
373 JBOT = MIN( NDCOL, KBOT )
374 ELSE IF( WANTT ) THEN
375 JBOT = N
376 ELSE
377 JBOT = KBOT
378 END IF
379 DO 40 J = MAX( KTOP, KRCOL ), JBOT
380 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
381 DO 30 M = MTOP, MEND
382 K = KRCOL + 3*( M-1 )
383 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
384 $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
385 H( K+1, J ) = H( K+1, J ) - REFSUM
386 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
387 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
388 30 CONTINUE
389 40 CONTINUE
390 IF( BMP22 ) THEN
391 K = KRCOL + 3*( M22-1 )
392 DO 50 J = MAX( K+1, KTOP ), JBOT
393 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
394 $ H( K+2, J ) )
395 H( K+1, J ) = H( K+1, J ) - REFSUM
396 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
397 50 CONTINUE
398 END IF
399 *
400 * ==== Multiply H by reflections from the right.
401 * . Delay filling in the last row until the
402 * . vigilant deflation check is complete. ====
403 *
404 IF( ACCUM ) THEN
405 JTOP = MAX( KTOP, INCOL )
406 ELSE IF( WANTT ) THEN
407 JTOP = 1
408 ELSE
409 JTOP = KTOP
410 END IF
411 DO 90 M = MTOP, MBOT
412 IF( V( 1, M ).NE.ZERO ) THEN
413 K = KRCOL + 3*( M-1 )
414 DO 60 J = JTOP, MIN( KBOT, K+3 )
415 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
416 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
417 H( J, K+1 ) = H( J, K+1 ) - REFSUM
418 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
419 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
420 60 CONTINUE
421 *
422 IF( ACCUM ) THEN
423 *
424 * ==== Accumulate U. (If necessary, update Z later
425 * . with with an efficient matrix-matrix
426 * . multiply.) ====
427 *
428 KMS = K - INCOL
429 DO 70 J = MAX( 1, KTOP-INCOL ), KDU
430 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
431 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
432 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
433 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
434 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
435 70 CONTINUE
436 ELSE IF( WANTZ ) THEN
437 *
438 * ==== U is not accumulated, so update Z
439 * . now by multiplying by reflections
440 * . from the right. ====
441 *
442 DO 80 J = ILOZ, IHIZ
443 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
444 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
445 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
446 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
447 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
448 80 CONTINUE
449 END IF
450 END IF
451 90 CONTINUE
452 *
453 * ==== Special case: 2-by-2 reflection (if needed) ====
454 *
455 K = KRCOL + 3*( M22-1 )
456 IF( BMP22 ) THEN
457 IF ( V( 1, M22 ).NE.ZERO ) THEN
458 DO 100 J = JTOP, MIN( KBOT, K+3 )
459 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
460 $ H( J, K+2 ) )
461 H( J, K+1 ) = H( J, K+1 ) - REFSUM
462 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
463 100 CONTINUE
464 *
465 IF( ACCUM ) THEN
466 KMS = K - INCOL
467 DO 110 J = MAX( 1, KTOP-INCOL ), KDU
468 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
469 $ V( 2, M22 )*U( J, KMS+2 ) )
470 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
471 U( J, KMS+2 ) = U( J, KMS+2 ) -
472 $ REFSUM*V( 2, M22 )
473 110 CONTINUE
474 ELSE IF( WANTZ ) THEN
475 DO 120 J = ILOZ, IHIZ
476 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
477 $ Z( J, K+2 ) )
478 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
479 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
480 120 CONTINUE
481 END IF
482 END IF
483 END IF
484 *
485 * ==== Vigilant deflation check ====
486 *
487 MSTART = MTOP
488 IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
489 $ MSTART = MSTART + 1
490 MEND = MBOT
491 IF( BMP22 )
492 $ MEND = MEND + 1
493 IF( KRCOL.EQ.KBOT-2 )
494 $ MEND = MEND + 1
495 DO 130 M = MSTART, MEND
496 K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
497 *
498 * ==== The following convergence test requires that
499 * . the tradition small-compared-to-nearby-diagonals
500 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
501 * . criteria both be satisfied. The latter improves
502 * . accuracy in some examples. Falling back on an
503 * . alternate convergence criterion when TST1 or TST2
504 * . is zero (as done here) is traditional but probably
505 * . unnecessary. ====
506 *
507 IF( H( K+1, K ).NE.ZERO ) THEN
508 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
509 IF( TST1.EQ.ZERO ) THEN
510 IF( K.GE.KTOP+1 )
511 $ TST1 = TST1 + ABS( H( K, K-1 ) )
512 IF( K.GE.KTOP+2 )
513 $ TST1 = TST1 + ABS( H( K, K-2 ) )
514 IF( K.GE.KTOP+3 )
515 $ TST1 = TST1 + ABS( H( K, K-3 ) )
516 IF( K.LE.KBOT-2 )
517 $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
518 IF( K.LE.KBOT-3 )
519 $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
520 IF( K.LE.KBOT-4 )
521 $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
522 END IF
523 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
524 $ THEN
525 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
526 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
527 H11 = MAX( ABS( H( K+1, K+1 ) ),
528 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
529 H22 = MIN( ABS( H( K+1, K+1 ) ),
530 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
531 SCL = H11 + H12
532 TST2 = H22*( H11 / SCL )
533 *
534 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
535 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
536 END IF
537 END IF
538 130 CONTINUE
539 *
540 * ==== Fill in the last row of each bulge. ====
541 *
542 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
543 DO 140 M = MTOP, MEND
544 K = KRCOL + 3*( M-1 )
545 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
546 H( K+4, K+1 ) = -REFSUM
547 H( K+4, K+2 ) = -REFSUM*V( 2, M )
548 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
549 140 CONTINUE
550 *
551 * ==== End of near-the-diagonal bulge chase. ====
552 *
553 150 CONTINUE
554 *
555 * ==== Use U (if accumulated) to update far-from-diagonal
556 * . entries in H. If required, use U to update Z as
557 * . well. ====
558 *
559 IF( ACCUM ) THEN
560 IF( WANTT ) THEN
561 JTOP = 1
562 JBOT = N
563 ELSE
564 JTOP = KTOP
565 JBOT = KBOT
566 END IF
567 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
568 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
569 *
570 * ==== Updates not exploiting the 2-by-2 block
571 * . structure of U. K1 and NU keep track of
572 * . the location and size of U in the special
573 * . cases of introducing bulges and chasing
574 * . bulges off the bottom. In these special
575 * . cases and in case the number of shifts
576 * . is NS = 2, there is no 2-by-2 block
577 * . structure to exploit. ====
578 *
579 K1 = MAX( 1, KTOP-INCOL )
580 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
581 *
582 * ==== Horizontal Multiply ====
583 *
584 DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
585 JLEN = MIN( NH, JBOT-JCOL+1 )
586 CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
587 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
588 $ LDWH )
589 CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
590 $ H( INCOL+K1, JCOL ), LDH )
591 160 CONTINUE
592 *
593 * ==== Vertical multiply ====
594 *
595 DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
596 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
597 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
598 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
599 $ LDU, ZERO, WV, LDWV )
600 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
601 $ H( JROW, INCOL+K1 ), LDH )
602 170 CONTINUE
603 *
604 * ==== Z multiply (also vertical) ====
605 *
606 IF( WANTZ ) THEN
607 DO 180 JROW = ILOZ, IHIZ, NV
608 JLEN = MIN( NV, IHIZ-JROW+1 )
609 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
610 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
611 $ LDU, ZERO, WV, LDWV )
612 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
613 $ Z( JROW, INCOL+K1 ), LDZ )
614 180 CONTINUE
615 END IF
616 ELSE
617 *
618 * ==== Updates exploiting U's 2-by-2 block structure.
619 * . (I2, I4, J2, J4 are the last rows and columns
620 * . of the blocks.) ====
621 *
622 I2 = ( KDU+1 ) / 2
623 I4 = KDU
624 J2 = I4 - I2
625 J4 = KDU
626 *
627 * ==== KZS and KNZ deal with the band of zeros
628 * . along the diagonal of one of the triangular
629 * . blocks. ====
630 *
631 KZS = ( J4-J2 ) - ( NS+1 )
632 KNZ = NS + 1
633 *
634 * ==== Horizontal multiply ====
635 *
636 DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
637 JLEN = MIN( NH, JBOT-JCOL+1 )
638 *
639 * ==== Copy bottom of H to top+KZS of scratch ====
640 * (The first KZS rows get multiplied by zero.) ====
641 *
642 CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
643 $ LDH, WH( KZS+1, 1 ), LDWH )
644 *
645 * ==== Multiply by U21**T ====
646 *
647 CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
648 CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
649 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
650 $ LDWH )
651 *
652 * ==== Multiply top of H by U11**T ====
653 *
654 CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
655 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
656 *
657 * ==== Copy top of H to bottom of WH ====
658 *
659 CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
660 $ WH( I2+1, 1 ), LDWH )
661 *
662 * ==== Multiply by U21**T ====
663 *
664 CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
665 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
666 *
667 * ==== Multiply by U22 ====
668 *
669 CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
670 $ U( J2+1, I2+1 ), LDU,
671 $ H( INCOL+1+J2, JCOL ), LDH, ONE,
672 $ WH( I2+1, 1 ), LDWH )
673 *
674 * ==== Copy it back ====
675 *
676 CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
677 $ H( INCOL+1, JCOL ), LDH )
678 190 CONTINUE
679 *
680 * ==== Vertical multiply ====
681 *
682 DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
683 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
684 *
685 * ==== Copy right of H to scratch (the first KZS
686 * . columns get multiplied by zero) ====
687 *
688 CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
689 $ LDH, WV( 1, 1+KZS ), LDWV )
690 *
691 * ==== Multiply by U21 ====
692 *
693 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
694 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
695 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
696 $ LDWV )
697 *
698 * ==== Multiply by U11 ====
699 *
700 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
701 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
702 $ LDWV )
703 *
704 * ==== Copy left of H to right of scratch ====
705 *
706 CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
707 $ WV( 1, 1+I2 ), LDWV )
708 *
709 * ==== Multiply by U21 ====
710 *
711 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
712 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
713 *
714 * ==== Multiply by U22 ====
715 *
716 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
717 $ H( JROW, INCOL+1+J2 ), LDH,
718 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
719 $ LDWV )
720 *
721 * ==== Copy it back ====
722 *
723 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
724 $ H( JROW, INCOL+1 ), LDH )
725 200 CONTINUE
726 *
727 * ==== Multiply Z (also vertical) ====
728 *
729 IF( WANTZ ) THEN
730 DO 210 JROW = ILOZ, IHIZ, NV
731 JLEN = MIN( NV, IHIZ-JROW+1 )
732 *
733 * ==== Copy right of Z to left of scratch (first
734 * . KZS columns get multiplied by zero) ====
735 *
736 CALL DLACPY( 'ALL', JLEN, KNZ,
737 $ Z( JROW, INCOL+1+J2 ), LDZ,
738 $ WV( 1, 1+KZS ), LDWV )
739 *
740 * ==== Multiply by U12 ====
741 *
742 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
743 $ LDWV )
744 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
745 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
746 $ LDWV )
747 *
748 * ==== Multiply by U11 ====
749 *
750 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
751 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
752 $ WV, LDWV )
753 *
754 * ==== Copy left of Z to right of scratch ====
755 *
756 CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
757 $ LDZ, WV( 1, 1+I2 ), LDWV )
758 *
759 * ==== Multiply by U21 ====
760 *
761 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
762 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
763 $ LDWV )
764 *
765 * ==== Multiply by U22 ====
766 *
767 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
768 $ Z( JROW, INCOL+1+J2 ), LDZ,
769 $ U( J2+1, I2+1 ), LDU, ONE,
770 $ WV( 1, 1+I2 ), LDWV )
771 *
772 * ==== Copy the result back to Z ====
773 *
774 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
775 $ Z( JROW, INCOL+1 ), LDZ )
776 210 CONTINUE
777 END IF
778 END IF
779 END IF
780 220 CONTINUE
781 *
782 * ==== End of DLAQR5 ====
783 *
784 END
2 $ SR, SI, H, LDH, ILOZ, IHIZ, Z, LDZ, V, LDV, U,
3 $ LDU, NV, WV, LDWV, NH, WH, LDWH )
4 *
5 * -- LAPACK auxiliary routine (version 3.3.0) --
6 * -- LAPACK is a software package provided by Univ. of Tennessee, --
7 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
8 * November 2010
9 *
10 * .. Scalar Arguments ..
11 INTEGER IHIZ, ILOZ, KACC22, KBOT, KTOP, LDH, LDU, LDV,
12 $ LDWH, LDWV, LDZ, N, NH, NSHFTS, NV
13 LOGICAL WANTT, WANTZ
14 * ..
15 * .. Array Arguments ..
16 DOUBLE PRECISION H( LDH, * ), SI( * ), SR( * ), U( LDU, * ),
17 $ V( LDV, * ), WH( LDWH, * ), WV( LDWV, * ),
18 $ Z( LDZ, * )
19 * ..
20 *
21 * This auxiliary subroutine called by DLAQR0 performs a
22 * single small-bulge multi-shift QR sweep.
23 *
24 * WANTT (input) logical scalar
25 * WANTT = .true. if the quasi-triangular Schur factor
26 * is being computed. WANTT is set to .false. otherwise.
27 *
28 * WANTZ (input) logical scalar
29 * WANTZ = .true. if the orthogonal Schur factor is being
30 * computed. WANTZ is set to .false. otherwise.
31 *
32 * KACC22 (input) integer with value 0, 1, or 2.
33 * Specifies the computation mode of far-from-diagonal
34 * orthogonal updates.
35 * = 0: DLAQR5 does not accumulate reflections and does not
36 * use matrix-matrix multiply to update far-from-diagonal
37 * matrix entries.
38 * = 1: DLAQR5 accumulates reflections and uses matrix-matrix
39 * multiply to update the far-from-diagonal matrix entries.
40 * = 2: DLAQR5 accumulates reflections, uses matrix-matrix
41 * multiply to update the far-from-diagonal matrix entries,
42 * and takes advantage of 2-by-2 block structure during
43 * matrix multiplies.
44 *
45 * N (input) integer scalar
46 * N is the order of the Hessenberg matrix H upon which this
47 * subroutine operates.
48 *
49 * KTOP (input) integer scalar
50 * KBOT (input) integer scalar
51 * These are the first and last rows and columns of an
52 * isolated diagonal block upon which the QR sweep is to be
53 * applied. It is assumed without a check that
54 * either KTOP = 1 or H(KTOP,KTOP-1) = 0
55 * and
56 * either KBOT = N or H(KBOT+1,KBOT) = 0.
57 *
58 * NSHFTS (input) integer scalar
59 * NSHFTS gives the number of simultaneous shifts. NSHFTS
60 * must be positive and even.
61 *
62 * SR (input/output) DOUBLE PRECISION array of size (NSHFTS)
63 * SI (input/output) DOUBLE PRECISION array of size (NSHFTS)
64 * SR contains the real parts and SI contains the imaginary
65 * parts of the NSHFTS shifts of origin that define the
66 * multi-shift QR sweep. On output SR and SI may be
67 * reordered.
68 *
69 * H (input/output) DOUBLE PRECISION array of size (LDH,N)
70 * On input H contains a Hessenberg matrix. On output a
71 * multi-shift QR sweep with shifts SR(J)+i*SI(J) is applied
72 * to the isolated diagonal block in rows and columns KTOP
73 * through KBOT.
74 *
75 * LDH (input) integer scalar
76 * LDH is the leading dimension of H just as declared in the
77 * calling procedure. LDH.GE.MAX(1,N).
78 *
79 * ILOZ (input) INTEGER
80 * IHIZ (input) INTEGER
81 * Specify the rows of Z to which transformations must be
82 * applied if WANTZ is .TRUE.. 1 .LE. ILOZ .LE. IHIZ .LE. N
83 *
84 * Z (input/output) DOUBLE PRECISION array of size (LDZ,IHI)
85 * If WANTZ = .TRUE., then the QR Sweep orthogonal
86 * similarity transformation is accumulated into
87 * Z(ILOZ:IHIZ,ILO:IHI) from the right.
88 * If WANTZ = .FALSE., then Z is unreferenced.
89 *
90 * LDZ (input) integer scalar
91 * LDA is the leading dimension of Z just as declared in
92 * the calling procedure. LDZ.GE.N.
93 *
94 * V (workspace) DOUBLE PRECISION array of size (LDV,NSHFTS/2)
95 *
96 * LDV (input) integer scalar
97 * LDV is the leading dimension of V as declared in the
98 * calling procedure. LDV.GE.3.
99 *
100 * U (workspace) DOUBLE PRECISION array of size
101 * (LDU,3*NSHFTS-3)
102 *
103 * LDU (input) integer scalar
104 * LDU is the leading dimension of U just as declared in the
105 * in the calling subroutine. LDU.GE.3*NSHFTS-3.
106 *
107 * NH (input) integer scalar
108 * NH is the number of columns in array WH available for
109 * workspace. NH.GE.1.
110 *
111 * WH (workspace) DOUBLE PRECISION array of size (LDWH,NH)
112 *
113 * LDWH (input) integer scalar
114 * Leading dimension of WH just as declared in the
115 * calling procedure. LDWH.GE.3*NSHFTS-3.
116 *
117 * NV (input) integer scalar
118 * NV is the number of rows in WV agailable for workspace.
119 * NV.GE.1.
120 *
121 * WV (workspace) DOUBLE PRECISION array of size
122 * (LDWV,3*NSHFTS-3)
123 *
124 * LDWV (input) integer scalar
125 * LDWV is the leading dimension of WV as declared in the
126 * in the calling subroutine. LDWV.GE.NV.
127 *
128 * ================================================================
129 * Based on contributions by
130 * Karen Braman and Ralph Byers, Department of Mathematics,
131 * University of Kansas, USA
132 *
133 * ================================================================
134 * Reference:
135 *
136 * K. Braman, R. Byers and R. Mathias, The Multi-Shift QR
137 * Algorithm Part I: Maintaining Well Focused Shifts, and
138 * Level 3 Performance, SIAM Journal of Matrix Analysis,
139 * volume 23, pages 929--947, 2002.
140 *
141 * ================================================================
142 * .. Parameters ..
143 DOUBLE PRECISION ZERO, ONE
144 PARAMETER ( ZERO = 0.0d0, ONE = 1.0d0 )
145 * ..
146 * .. Local Scalars ..
147 DOUBLE PRECISION ALPHA, BETA, H11, H12, H21, H22, REFSUM,
148 $ SAFMAX, SAFMIN, SCL, SMLNUM, SWAP, TST1, TST2,
149 $ ULP
150 INTEGER I, I2, I4, INCOL, J, J2, J4, JBOT, JCOL, JLEN,
151 $ JROW, JTOP, K, K1, KDU, KMS, KNZ, KRCOL, KZS,
152 $ M, M22, MBOT, MEND, MSTART, MTOP, NBMPS, NDCOL,
153 $ NS, NU
154 LOGICAL ACCUM, BLK22, BMP22
155 * ..
156 * .. External Functions ..
157 DOUBLE PRECISION DLAMCH
158 EXTERNAL DLAMCH
159 * ..
160 * .. Intrinsic Functions ..
161 *
162 INTRINSIC ABS, DBLE, MAX, MIN, MOD
163 * ..
164 * .. Local Arrays ..
165 DOUBLE PRECISION VT( 3 )
166 * ..
167 * .. External Subroutines ..
168 EXTERNAL DGEMM, DLABAD, DLACPY, DLAQR1, DLARFG, DLASET,
169 $ DTRMM
170 * ..
171 * .. Executable Statements ..
172 *
173 * ==== If there are no shifts, then there is nothing to do. ====
174 *
175 IF( NSHFTS.LT.2 )
176 $ RETURN
177 *
178 * ==== If the active block is empty or 1-by-1, then there
179 * . is nothing to do. ====
180 *
181 IF( KTOP.GE.KBOT )
182 $ RETURN
183 *
184 * ==== Shuffle shifts into pairs of real shifts and pairs
185 * . of complex conjugate shifts assuming complex
186 * . conjugate shifts are already adjacent to one
187 * . another. ====
188 *
189 DO 10 I = 1, NSHFTS - 2, 2
190 IF( SI( I ).NE.-SI( I+1 ) ) THEN
191 *
192 SWAP = SR( I )
193 SR( I ) = SR( I+1 )
194 SR( I+1 ) = SR( I+2 )
195 SR( I+2 ) = SWAP
196 *
197 SWAP = SI( I )
198 SI( I ) = SI( I+1 )
199 SI( I+1 ) = SI( I+2 )
200 SI( I+2 ) = SWAP
201 END IF
202 10 CONTINUE
203 *
204 * ==== NSHFTS is supposed to be even, but if it is odd,
205 * . then simply reduce it by one. The shuffle above
206 * . ensures that the dropped shift is real and that
207 * . the remaining shifts are paired. ====
208 *
209 NS = NSHFTS - MOD( NSHFTS, 2 )
210 *
211 * ==== Machine constants for deflation ====
212 *
213 SAFMIN = DLAMCH( 'SAFE MINIMUM' )
214 SAFMAX = ONE / SAFMIN
215 CALL DLABAD( SAFMIN, SAFMAX )
216 ULP = DLAMCH( 'PRECISION' )
217 SMLNUM = SAFMIN*( DBLE( N ) / ULP )
218 *
219 * ==== Use accumulated reflections to update far-from-diagonal
220 * . entries ? ====
221 *
222 ACCUM = ( KACC22.EQ.1 ) .OR. ( KACC22.EQ.2 )
223 *
224 * ==== If so, exploit the 2-by-2 block structure? ====
225 *
226 BLK22 = ( NS.GT.2 ) .AND. ( KACC22.EQ.2 )
227 *
228 * ==== clear trash ====
229 *
230 IF( KTOP+2.LE.KBOT )
231 $ H( KTOP+2, KTOP ) = ZERO
232 *
233 * ==== NBMPS = number of 2-shift bulges in the chain ====
234 *
235 NBMPS = NS / 2
236 *
237 * ==== KDU = width of slab ====
238 *
239 KDU = 6*NBMPS - 3
240 *
241 * ==== Create and chase chains of NBMPS bulges ====
242 *
243 DO 220 INCOL = 3*( 1-NBMPS ) + KTOP - 1, KBOT - 2, 3*NBMPS - 2
244 NDCOL = INCOL + KDU
245 IF( ACCUM )
246 $ CALL DLASET( 'ALL', KDU, KDU, ZERO, ONE, U, LDU )
247 *
248 * ==== Near-the-diagonal bulge chase. The following loop
249 * . performs the near-the-diagonal part of a small bulge
250 * . multi-shift QR sweep. Each 6*NBMPS-2 column diagonal
251 * . chunk extends from column INCOL to column NDCOL
252 * . (including both column INCOL and column NDCOL). The
253 * . following loop chases a 3*NBMPS column long chain of
254 * . NBMPS bulges 3*NBMPS-2 columns to the right. (INCOL
255 * . may be less than KTOP and and NDCOL may be greater than
256 * . KBOT indicating phantom columns from which to chase
257 * . bulges before they are actually introduced or to which
258 * . to chase bulges beyond column KBOT.) ====
259 *
260 DO 150 KRCOL = INCOL, MIN( INCOL+3*NBMPS-3, KBOT-2 )
261 *
262 * ==== Bulges number MTOP to MBOT are active double implicit
263 * . shift bulges. There may or may not also be small
264 * . 2-by-2 bulge, if there is room. The inactive bulges
265 * . (if any) must wait until the active bulges have moved
266 * . down the diagonal to make room. The phantom matrix
267 * . paradigm described above helps keep track. ====
268 *
269 MTOP = MAX( 1, ( ( KTOP-1 )-KRCOL+2 ) / 3+1 )
270 MBOT = MIN( NBMPS, ( KBOT-KRCOL ) / 3 )
271 M22 = MBOT + 1
272 BMP22 = ( MBOT.LT.NBMPS ) .AND. ( KRCOL+3*( M22-1 ) ).EQ.
273 $ ( KBOT-2 )
274 *
275 * ==== Generate reflections to chase the chain right
276 * . one column. (The minimum value of K is KTOP-1.) ====
277 *
278 DO 20 M = MTOP, MBOT
279 K = KRCOL + 3*( M-1 )
280 IF( K.EQ.KTOP-1 ) THEN
281 CALL DLAQR1( 3, H( KTOP, KTOP ), LDH, SR( 2*M-1 ),
282 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
283 $ V( 1, M ) )
284 ALPHA = V( 1, M )
285 CALL DLARFG( 3, ALPHA, V( 2, M ), 1, V( 1, M ) )
286 ELSE
287 BETA = H( K+1, K )
288 V( 2, M ) = H( K+2, K )
289 V( 3, M ) = H( K+3, K )
290 CALL DLARFG( 3, BETA, V( 2, M ), 1, V( 1, M ) )
291 *
292 * ==== A Bulge may collapse because of vigilant
293 * . deflation or destructive underflow. In the
294 * . underflow case, try the two-small-subdiagonals
295 * . trick to try to reinflate the bulge. ====
296 *
297 IF( H( K+3, K ).NE.ZERO .OR. H( K+3, K+1 ).NE.
298 $ ZERO .OR. H( K+3, K+2 ).EQ.ZERO ) THEN
299 *
300 * ==== Typical case: not collapsed (yet). ====
301 *
302 H( K+1, K ) = BETA
303 H( K+2, K ) = ZERO
304 H( K+3, K ) = ZERO
305 ELSE
306 *
307 * ==== Atypical case: collapsed. Attempt to
308 * . reintroduce ignoring H(K+1,K) and H(K+2,K).
309 * . If the fill resulting from the new
310 * . reflector is too large, then abandon it.
311 * . Otherwise, use the new one. ====
312 *
313 CALL DLAQR1( 3, H( K+1, K+1 ), LDH, SR( 2*M-1 ),
314 $ SI( 2*M-1 ), SR( 2*M ), SI( 2*M ),
315 $ VT )
316 ALPHA = VT( 1 )
317 CALL DLARFG( 3, ALPHA, VT( 2 ), 1, VT( 1 ) )
318 REFSUM = VT( 1 )*( H( K+1, K )+VT( 2 )*
319 $ H( K+2, K ) )
320 *
321 IF( ABS( H( K+2, K )-REFSUM*VT( 2 ) )+
322 $ ABS( REFSUM*VT( 3 ) ).GT.ULP*
323 $ ( ABS( H( K, K ) )+ABS( H( K+1,
324 $ K+1 ) )+ABS( H( K+2, K+2 ) ) ) ) THEN
325 *
326 * ==== Starting a new bulge here would
327 * . create non-negligible fill. Use
328 * . the old one with trepidation. ====
329 *
330 H( K+1, K ) = BETA
331 H( K+2, K ) = ZERO
332 H( K+3, K ) = ZERO
333 ELSE
334 *
335 * ==== Stating a new bulge here would
336 * . create only negligible fill.
337 * . Replace the old reflector with
338 * . the new one. ====
339 *
340 H( K+1, K ) = H( K+1, K ) - REFSUM
341 H( K+2, K ) = ZERO
342 H( K+3, K ) = ZERO
343 V( 1, M ) = VT( 1 )
344 V( 2, M ) = VT( 2 )
345 V( 3, M ) = VT( 3 )
346 END IF
347 END IF
348 END IF
349 20 CONTINUE
350 *
351 * ==== Generate a 2-by-2 reflection, if needed. ====
352 *
353 K = KRCOL + 3*( M22-1 )
354 IF( BMP22 ) THEN
355 IF( K.EQ.KTOP-1 ) THEN
356 CALL DLAQR1( 2, H( K+1, K+1 ), LDH, SR( 2*M22-1 ),
357 $ SI( 2*M22-1 ), SR( 2*M22 ), SI( 2*M22 ),
358 $ V( 1, M22 ) )
359 BETA = V( 1, M22 )
360 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
361 ELSE
362 BETA = H( K+1, K )
363 V( 2, M22 ) = H( K+2, K )
364 CALL DLARFG( 2, BETA, V( 2, M22 ), 1, V( 1, M22 ) )
365 H( K+1, K ) = BETA
366 H( K+2, K ) = ZERO
367 END IF
368 END IF
369 *
370 * ==== Multiply H by reflections from the left ====
371 *
372 IF( ACCUM ) THEN
373 JBOT = MIN( NDCOL, KBOT )
374 ELSE IF( WANTT ) THEN
375 JBOT = N
376 ELSE
377 JBOT = KBOT
378 END IF
379 DO 40 J = MAX( KTOP, KRCOL ), JBOT
380 MEND = MIN( MBOT, ( J-KRCOL+2 ) / 3 )
381 DO 30 M = MTOP, MEND
382 K = KRCOL + 3*( M-1 )
383 REFSUM = V( 1, M )*( H( K+1, J )+V( 2, M )*
384 $ H( K+2, J )+V( 3, M )*H( K+3, J ) )
385 H( K+1, J ) = H( K+1, J ) - REFSUM
386 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M )
387 H( K+3, J ) = H( K+3, J ) - REFSUM*V( 3, M )
388 30 CONTINUE
389 40 CONTINUE
390 IF( BMP22 ) THEN
391 K = KRCOL + 3*( M22-1 )
392 DO 50 J = MAX( K+1, KTOP ), JBOT
393 REFSUM = V( 1, M22 )*( H( K+1, J )+V( 2, M22 )*
394 $ H( K+2, J ) )
395 H( K+1, J ) = H( K+1, J ) - REFSUM
396 H( K+2, J ) = H( K+2, J ) - REFSUM*V( 2, M22 )
397 50 CONTINUE
398 END IF
399 *
400 * ==== Multiply H by reflections from the right.
401 * . Delay filling in the last row until the
402 * . vigilant deflation check is complete. ====
403 *
404 IF( ACCUM ) THEN
405 JTOP = MAX( KTOP, INCOL )
406 ELSE IF( WANTT ) THEN
407 JTOP = 1
408 ELSE
409 JTOP = KTOP
410 END IF
411 DO 90 M = MTOP, MBOT
412 IF( V( 1, M ).NE.ZERO ) THEN
413 K = KRCOL + 3*( M-1 )
414 DO 60 J = JTOP, MIN( KBOT, K+3 )
415 REFSUM = V( 1, M )*( H( J, K+1 )+V( 2, M )*
416 $ H( J, K+2 )+V( 3, M )*H( J, K+3 ) )
417 H( J, K+1 ) = H( J, K+1 ) - REFSUM
418 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M )
419 H( J, K+3 ) = H( J, K+3 ) - REFSUM*V( 3, M )
420 60 CONTINUE
421 *
422 IF( ACCUM ) THEN
423 *
424 * ==== Accumulate U. (If necessary, update Z later
425 * . with with an efficient matrix-matrix
426 * . multiply.) ====
427 *
428 KMS = K - INCOL
429 DO 70 J = MAX( 1, KTOP-INCOL ), KDU
430 REFSUM = V( 1, M )*( U( J, KMS+1 )+V( 2, M )*
431 $ U( J, KMS+2 )+V( 3, M )*U( J, KMS+3 ) )
432 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
433 U( J, KMS+2 ) = U( J, KMS+2 ) - REFSUM*V( 2, M )
434 U( J, KMS+3 ) = U( J, KMS+3 ) - REFSUM*V( 3, M )
435 70 CONTINUE
436 ELSE IF( WANTZ ) THEN
437 *
438 * ==== U is not accumulated, so update Z
439 * . now by multiplying by reflections
440 * . from the right. ====
441 *
442 DO 80 J = ILOZ, IHIZ
443 REFSUM = V( 1, M )*( Z( J, K+1 )+V( 2, M )*
444 $ Z( J, K+2 )+V( 3, M )*Z( J, K+3 ) )
445 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
446 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M )
447 Z( J, K+3 ) = Z( J, K+3 ) - REFSUM*V( 3, M )
448 80 CONTINUE
449 END IF
450 END IF
451 90 CONTINUE
452 *
453 * ==== Special case: 2-by-2 reflection (if needed) ====
454 *
455 K = KRCOL + 3*( M22-1 )
456 IF( BMP22 ) THEN
457 IF ( V( 1, M22 ).NE.ZERO ) THEN
458 DO 100 J = JTOP, MIN( KBOT, K+3 )
459 REFSUM = V( 1, M22 )*( H( J, K+1 )+V( 2, M22 )*
460 $ H( J, K+2 ) )
461 H( J, K+1 ) = H( J, K+1 ) - REFSUM
462 H( J, K+2 ) = H( J, K+2 ) - REFSUM*V( 2, M22 )
463 100 CONTINUE
464 *
465 IF( ACCUM ) THEN
466 KMS = K - INCOL
467 DO 110 J = MAX( 1, KTOP-INCOL ), KDU
468 REFSUM = V( 1, M22 )*( U( J, KMS+1 )+
469 $ V( 2, M22 )*U( J, KMS+2 ) )
470 U( J, KMS+1 ) = U( J, KMS+1 ) - REFSUM
471 U( J, KMS+2 ) = U( J, KMS+2 ) -
472 $ REFSUM*V( 2, M22 )
473 110 CONTINUE
474 ELSE IF( WANTZ ) THEN
475 DO 120 J = ILOZ, IHIZ
476 REFSUM = V( 1, M22 )*( Z( J, K+1 )+V( 2, M22 )*
477 $ Z( J, K+2 ) )
478 Z( J, K+1 ) = Z( J, K+1 ) - REFSUM
479 Z( J, K+2 ) = Z( J, K+2 ) - REFSUM*V( 2, M22 )
480 120 CONTINUE
481 END IF
482 END IF
483 END IF
484 *
485 * ==== Vigilant deflation check ====
486 *
487 MSTART = MTOP
488 IF( KRCOL+3*( MSTART-1 ).LT.KTOP )
489 $ MSTART = MSTART + 1
490 MEND = MBOT
491 IF( BMP22 )
492 $ MEND = MEND + 1
493 IF( KRCOL.EQ.KBOT-2 )
494 $ MEND = MEND + 1
495 DO 130 M = MSTART, MEND
496 K = MIN( KBOT-1, KRCOL+3*( M-1 ) )
497 *
498 * ==== The following convergence test requires that
499 * . the tradition small-compared-to-nearby-diagonals
500 * . criterion and the Ahues & Tisseur (LAWN 122, 1997)
501 * . criteria both be satisfied. The latter improves
502 * . accuracy in some examples. Falling back on an
503 * . alternate convergence criterion when TST1 or TST2
504 * . is zero (as done here) is traditional but probably
505 * . unnecessary. ====
506 *
507 IF( H( K+1, K ).NE.ZERO ) THEN
508 TST1 = ABS( H( K, K ) ) + ABS( H( K+1, K+1 ) )
509 IF( TST1.EQ.ZERO ) THEN
510 IF( K.GE.KTOP+1 )
511 $ TST1 = TST1 + ABS( H( K, K-1 ) )
512 IF( K.GE.KTOP+2 )
513 $ TST1 = TST1 + ABS( H( K, K-2 ) )
514 IF( K.GE.KTOP+3 )
515 $ TST1 = TST1 + ABS( H( K, K-3 ) )
516 IF( K.LE.KBOT-2 )
517 $ TST1 = TST1 + ABS( H( K+2, K+1 ) )
518 IF( K.LE.KBOT-3 )
519 $ TST1 = TST1 + ABS( H( K+3, K+1 ) )
520 IF( K.LE.KBOT-4 )
521 $ TST1 = TST1 + ABS( H( K+4, K+1 ) )
522 END IF
523 IF( ABS( H( K+1, K ) ).LE.MAX( SMLNUM, ULP*TST1 ) )
524 $ THEN
525 H12 = MAX( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
526 H21 = MIN( ABS( H( K+1, K ) ), ABS( H( K, K+1 ) ) )
527 H11 = MAX( ABS( H( K+1, K+1 ) ),
528 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
529 H22 = MIN( ABS( H( K+1, K+1 ) ),
530 $ ABS( H( K, K )-H( K+1, K+1 ) ) )
531 SCL = H11 + H12
532 TST2 = H22*( H11 / SCL )
533 *
534 IF( TST2.EQ.ZERO .OR. H21*( H12 / SCL ).LE.
535 $ MAX( SMLNUM, ULP*TST2 ) )H( K+1, K ) = ZERO
536 END IF
537 END IF
538 130 CONTINUE
539 *
540 * ==== Fill in the last row of each bulge. ====
541 *
542 MEND = MIN( NBMPS, ( KBOT-KRCOL-1 ) / 3 )
543 DO 140 M = MTOP, MEND
544 K = KRCOL + 3*( M-1 )
545 REFSUM = V( 1, M )*V( 3, M )*H( K+4, K+3 )
546 H( K+4, K+1 ) = -REFSUM
547 H( K+4, K+2 ) = -REFSUM*V( 2, M )
548 H( K+4, K+3 ) = H( K+4, K+3 ) - REFSUM*V( 3, M )
549 140 CONTINUE
550 *
551 * ==== End of near-the-diagonal bulge chase. ====
552 *
553 150 CONTINUE
554 *
555 * ==== Use U (if accumulated) to update far-from-diagonal
556 * . entries in H. If required, use U to update Z as
557 * . well. ====
558 *
559 IF( ACCUM ) THEN
560 IF( WANTT ) THEN
561 JTOP = 1
562 JBOT = N
563 ELSE
564 JTOP = KTOP
565 JBOT = KBOT
566 END IF
567 IF( ( .NOT.BLK22 ) .OR. ( INCOL.LT.KTOP ) .OR.
568 $ ( NDCOL.GT.KBOT ) .OR. ( NS.LE.2 ) ) THEN
569 *
570 * ==== Updates not exploiting the 2-by-2 block
571 * . structure of U. K1 and NU keep track of
572 * . the location and size of U in the special
573 * . cases of introducing bulges and chasing
574 * . bulges off the bottom. In these special
575 * . cases and in case the number of shifts
576 * . is NS = 2, there is no 2-by-2 block
577 * . structure to exploit. ====
578 *
579 K1 = MAX( 1, KTOP-INCOL )
580 NU = ( KDU-MAX( 0, NDCOL-KBOT ) ) - K1 + 1
581 *
582 * ==== Horizontal Multiply ====
583 *
584 DO 160 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
585 JLEN = MIN( NH, JBOT-JCOL+1 )
586 CALL DGEMM( 'C', 'N', NU, JLEN, NU, ONE, U( K1, K1 ),
587 $ LDU, H( INCOL+K1, JCOL ), LDH, ZERO, WH,
588 $ LDWH )
589 CALL DLACPY( 'ALL', NU, JLEN, WH, LDWH,
590 $ H( INCOL+K1, JCOL ), LDH )
591 160 CONTINUE
592 *
593 * ==== Vertical multiply ====
594 *
595 DO 170 JROW = JTOP, MAX( KTOP, INCOL ) - 1, NV
596 JLEN = MIN( NV, MAX( KTOP, INCOL )-JROW )
597 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
598 $ H( JROW, INCOL+K1 ), LDH, U( K1, K1 ),
599 $ LDU, ZERO, WV, LDWV )
600 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
601 $ H( JROW, INCOL+K1 ), LDH )
602 170 CONTINUE
603 *
604 * ==== Z multiply (also vertical) ====
605 *
606 IF( WANTZ ) THEN
607 DO 180 JROW = ILOZ, IHIZ, NV
608 JLEN = MIN( NV, IHIZ-JROW+1 )
609 CALL DGEMM( 'N', 'N', JLEN, NU, NU, ONE,
610 $ Z( JROW, INCOL+K1 ), LDZ, U( K1, K1 ),
611 $ LDU, ZERO, WV, LDWV )
612 CALL DLACPY( 'ALL', JLEN, NU, WV, LDWV,
613 $ Z( JROW, INCOL+K1 ), LDZ )
614 180 CONTINUE
615 END IF
616 ELSE
617 *
618 * ==== Updates exploiting U's 2-by-2 block structure.
619 * . (I2, I4, J2, J4 are the last rows and columns
620 * . of the blocks.) ====
621 *
622 I2 = ( KDU+1 ) / 2
623 I4 = KDU
624 J2 = I4 - I2
625 J4 = KDU
626 *
627 * ==== KZS and KNZ deal with the band of zeros
628 * . along the diagonal of one of the triangular
629 * . blocks. ====
630 *
631 KZS = ( J4-J2 ) - ( NS+1 )
632 KNZ = NS + 1
633 *
634 * ==== Horizontal multiply ====
635 *
636 DO 190 JCOL = MIN( NDCOL, KBOT ) + 1, JBOT, NH
637 JLEN = MIN( NH, JBOT-JCOL+1 )
638 *
639 * ==== Copy bottom of H to top+KZS of scratch ====
640 * (The first KZS rows get multiplied by zero.) ====
641 *
642 CALL DLACPY( 'ALL', KNZ, JLEN, H( INCOL+1+J2, JCOL ),
643 $ LDH, WH( KZS+1, 1 ), LDWH )
644 *
645 * ==== Multiply by U21**T ====
646 *
647 CALL DLASET( 'ALL', KZS, JLEN, ZERO, ZERO, WH, LDWH )
648 CALL DTRMM( 'L', 'U', 'C', 'N', KNZ, JLEN, ONE,
649 $ U( J2+1, 1+KZS ), LDU, WH( KZS+1, 1 ),
650 $ LDWH )
651 *
652 * ==== Multiply top of H by U11**T ====
653 *
654 CALL DGEMM( 'C', 'N', I2, JLEN, J2, ONE, U, LDU,
655 $ H( INCOL+1, JCOL ), LDH, ONE, WH, LDWH )
656 *
657 * ==== Copy top of H to bottom of WH ====
658 *
659 CALL DLACPY( 'ALL', J2, JLEN, H( INCOL+1, JCOL ), LDH,
660 $ WH( I2+1, 1 ), LDWH )
661 *
662 * ==== Multiply by U21**T ====
663 *
664 CALL DTRMM( 'L', 'L', 'C', 'N', J2, JLEN, ONE,
665 $ U( 1, I2+1 ), LDU, WH( I2+1, 1 ), LDWH )
666 *
667 * ==== Multiply by U22 ====
668 *
669 CALL DGEMM( 'C', 'N', I4-I2, JLEN, J4-J2, ONE,
670 $ U( J2+1, I2+1 ), LDU,
671 $ H( INCOL+1+J2, JCOL ), LDH, ONE,
672 $ WH( I2+1, 1 ), LDWH )
673 *
674 * ==== Copy it back ====
675 *
676 CALL DLACPY( 'ALL', KDU, JLEN, WH, LDWH,
677 $ H( INCOL+1, JCOL ), LDH )
678 190 CONTINUE
679 *
680 * ==== Vertical multiply ====
681 *
682 DO 200 JROW = JTOP, MAX( INCOL, KTOP ) - 1, NV
683 JLEN = MIN( NV, MAX( INCOL, KTOP )-JROW )
684 *
685 * ==== Copy right of H to scratch (the first KZS
686 * . columns get multiplied by zero) ====
687 *
688 CALL DLACPY( 'ALL', JLEN, KNZ, H( JROW, INCOL+1+J2 ),
689 $ LDH, WV( 1, 1+KZS ), LDWV )
690 *
691 * ==== Multiply by U21 ====
692 *
693 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV, LDWV )
694 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
695 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
696 $ LDWV )
697 *
698 * ==== Multiply by U11 ====
699 *
700 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
701 $ H( JROW, INCOL+1 ), LDH, U, LDU, ONE, WV,
702 $ LDWV )
703 *
704 * ==== Copy left of H to right of scratch ====
705 *
706 CALL DLACPY( 'ALL', JLEN, J2, H( JROW, INCOL+1 ), LDH,
707 $ WV( 1, 1+I2 ), LDWV )
708 *
709 * ==== Multiply by U21 ====
710 *
711 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
712 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ), LDWV )
713 *
714 * ==== Multiply by U22 ====
715 *
716 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
717 $ H( JROW, INCOL+1+J2 ), LDH,
718 $ U( J2+1, I2+1 ), LDU, ONE, WV( 1, 1+I2 ),
719 $ LDWV )
720 *
721 * ==== Copy it back ====
722 *
723 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
724 $ H( JROW, INCOL+1 ), LDH )
725 200 CONTINUE
726 *
727 * ==== Multiply Z (also vertical) ====
728 *
729 IF( WANTZ ) THEN
730 DO 210 JROW = ILOZ, IHIZ, NV
731 JLEN = MIN( NV, IHIZ-JROW+1 )
732 *
733 * ==== Copy right of Z to left of scratch (first
734 * . KZS columns get multiplied by zero) ====
735 *
736 CALL DLACPY( 'ALL', JLEN, KNZ,
737 $ Z( JROW, INCOL+1+J2 ), LDZ,
738 $ WV( 1, 1+KZS ), LDWV )
739 *
740 * ==== Multiply by U12 ====
741 *
742 CALL DLASET( 'ALL', JLEN, KZS, ZERO, ZERO, WV,
743 $ LDWV )
744 CALL DTRMM( 'R', 'U', 'N', 'N', JLEN, KNZ, ONE,
745 $ U( J2+1, 1+KZS ), LDU, WV( 1, 1+KZS ),
746 $ LDWV )
747 *
748 * ==== Multiply by U11 ====
749 *
750 CALL DGEMM( 'N', 'N', JLEN, I2, J2, ONE,
751 $ Z( JROW, INCOL+1 ), LDZ, U, LDU, ONE,
752 $ WV, LDWV )
753 *
754 * ==== Copy left of Z to right of scratch ====
755 *
756 CALL DLACPY( 'ALL', JLEN, J2, Z( JROW, INCOL+1 ),
757 $ LDZ, WV( 1, 1+I2 ), LDWV )
758 *
759 * ==== Multiply by U21 ====
760 *
761 CALL DTRMM( 'R', 'L', 'N', 'N', JLEN, I4-I2, ONE,
762 $ U( 1, I2+1 ), LDU, WV( 1, 1+I2 ),
763 $ LDWV )
764 *
765 * ==== Multiply by U22 ====
766 *
767 CALL DGEMM( 'N', 'N', JLEN, I4-I2, J4-J2, ONE,
768 $ Z( JROW, INCOL+1+J2 ), LDZ,
769 $ U( J2+1, I2+1 ), LDU, ONE,
770 $ WV( 1, 1+I2 ), LDWV )
771 *
772 * ==== Copy the result back to Z ====
773 *
774 CALL DLACPY( 'ALL', JLEN, KDU, WV, LDWV,
775 $ Z( JROW, INCOL+1 ), LDZ )
776 210 CONTINUE
777 END IF
778 END IF
779 END IF
780 220 CONTINUE
781 *
782 * ==== End of DLAQR5 ====
783 *
784 END