1 SUBROUTINE ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
2 $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
3 $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
4 $ B22D, B22E, RWORK, LRWORK, INFO )
5 IMPLICIT NONE
6 *
7 * -- LAPACK routine (version 3.3.0) --
8 *
9 * -- Contributed by Brian Sutton of the Randolph-Macon College --
10 * -- November 2010
11 *
12 * -- LAPACK is a software package provided by Univ. of Tennessee, --
13 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
14 *
15 * .. Scalar Arguments ..
16 CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
17 INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q
18 * ..
19 * .. Array Arguments ..
20 DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
21 $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
22 $ PHI( * ), THETA( * ), RWORK( * )
23 COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
24 $ V2T( LDV2T, * )
25 * ..
26 *
27 * Purpose
28 * =======
29 *
30 * ZBBCSD computes the CS decomposition of a unitary matrix in
31 * bidiagonal-block form,
32 *
33 *
34 * [ B11 | B12 0 0 ]
35 * [ 0 | 0 -I 0 ]
36 * X = [----------------]
37 * [ B21 | B22 0 0 ]
38 * [ 0 | 0 0 I ]
39 *
40 * [ C | -S 0 0 ]
41 * [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H
42 * = [---------] [---------------] [---------] .
43 * [ | U2 ] [ S | C 0 0 ] [ | V2 ]
44 * [ 0 | 0 0 I ]
45 *
46 * X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
47 * than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
48 * transposed and/or permuted. This can be done in constant time using
49 * the TRANS and SIGNS options. See ZUNCSD for details.)
50 *
51 * The bidiagonal matrices B11, B12, B21, and B22 are represented
52 * implicitly by angles THETA(1:Q) and PHI(1:Q-1).
53 *
54 * The unitary matrices U1, U2, V1T, and V2T are input/output.
55 * The input matrices are pre- or post-multiplied by the appropriate
56 * singular vector matrices.
57 *
58 * Arguments
59 * =========
60 *
61 * JOBU1 (input) CHARACTER
62 * = 'Y': U1 is updated;
63 * otherwise: U1 is not updated.
64 *
65 * JOBU2 (input) CHARACTER
66 * = 'Y': U2 is updated;
67 * otherwise: U2 is not updated.
68 *
69 * JOBV1T (input) CHARACTER
70 * = 'Y': V1T is updated;
71 * otherwise: V1T is not updated.
72 *
73 * JOBV2T (input) CHARACTER
74 * = 'Y': V2T is updated;
75 * otherwise: V2T is not updated.
76 *
77 * TRANS (input) CHARACTER
78 * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
79 * order;
80 * otherwise: X, U1, U2, V1T, and V2T are stored in column-
81 * major order.
82 *
83 * M (input) INTEGER
84 * The number of rows and columns in X, the unitary matrix in
85 * bidiagonal-block form.
86 *
87 * P (input) INTEGER
88 * The number of rows in the top-left block of X. 0 <= P <= M.
89 *
90 * Q (input) INTEGER
91 * The number of columns in the top-left block of X.
92 * 0 <= Q <= MIN(P,M-P,M-Q).
93 *
94 * THETA (input/output) DOUBLE PRECISION array, dimension (Q)
95 * On entry, the angles THETA(1),...,THETA(Q) that, along with
96 * PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
97 * form. On exit, the angles whose cosines and sines define the
98 * diagonal blocks in the CS decomposition.
99 *
100 * PHI (input/workspace) DOUBLE PRECISION array, dimension (Q-1)
101 * The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
102 * THETA(Q), define the matrix in bidiagonal-block form.
103 *
104 * U1 (input/output) COMPLEX*16 array, dimension (LDU1,P)
105 * On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
106 * by the left singular vector matrix common to [ B11 ; 0 ] and
107 * [ B12 0 0 ; 0 -I 0 0 ].
108 *
109 * LDU1 (input) INTEGER
110 * The leading dimension of the array U1.
111 *
112 * U2 (input/output) COMPLEX*16 array, dimension (LDU2,M-P)
113 * On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
114 * postmultiplied by the left singular vector matrix common to
115 * [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
116 *
117 * LDU2 (input) INTEGER
118 * The leading dimension of the array U2.
119 *
120 * V1T (input/output) COMPLEX*16 array, dimension (LDV1T,Q)
121 * On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
122 * by the conjugate transpose of the right singular vector
123 * matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
124 *
125 * LDV1T (input) INTEGER
126 * The leading dimension of the array V1T.
127 *
128 * V2T (input/output) COMPLEX*16 array, dimenison (LDV2T,M-Q)
129 * On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
130 * premultiplied by the conjugate transpose of the right
131 * singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
132 * [ B22 0 0 ; 0 0 I ].
133 *
134 * LDV2T (input) INTEGER
135 * The leading dimension of the array V2T.
136 *
137 * B11D (output) DOUBLE PRECISION array, dimension (Q)
138 * When ZBBCSD converges, B11D contains the cosines of THETA(1),
139 * ..., THETA(Q). If ZBBCSD fails to converge, then B11D
140 * contains the diagonal of the partially reduced top-left
141 * block.
142 *
143 * B11E (output) DOUBLE PRECISION array, dimension (Q-1)
144 * When ZBBCSD converges, B11E contains zeros. If ZBBCSD fails
145 * to converge, then B11E contains the superdiagonal of the
146 * partially reduced top-left block.
147 *
148 * B12D (output) DOUBLE PRECISION array, dimension (Q)
149 * When ZBBCSD converges, B12D contains the negative sines of
150 * THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then
151 * B12D contains the diagonal of the partially reduced top-right
152 * block.
153 *
154 * B12E (output) DOUBLE PRECISION array, dimension (Q-1)
155 * When ZBBCSD converges, B12E contains zeros. If ZBBCSD fails
156 * to converge, then B12E contains the subdiagonal of the
157 * partially reduced top-right block.
158 *
159 * RWORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
160 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
161 *
162 * LRWORK (input) INTEGER
163 * The dimension of the array RWORK. LRWORK >= MAX(1,8*Q).
164 *
165 * If LRWORK = -1, then a workspace query is assumed; the
166 * routine only calculates the optimal size of the RWORK array,
167 * returns this value as the first entry of the work array, and
168 * no error message related to LRWORK is issued by XERBLA.
169 *
170 * INFO (output) INTEGER
171 * = 0: successful exit.
172 * < 0: if INFO = -i, the i-th argument had an illegal value.
173 * > 0: if ZBBCSD did not converge, INFO specifies the number
174 * of nonzero entries in PHI, and B11D, B11E, etc.,
175 * contain the partially reduced matrix.
176 *
177 * Reference
178 * =========
179 *
180 * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
181 * Algorithms, 50(1):33-65, 2009.
182 *
183 * Internal Parameters
184 * ===================
185 *
186 * TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
187 * TOLMUL controls the convergence criterion of the QR loop.
188 * Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
189 * are within TOLMUL*EPS of either bound.
190 *
191 * ===================================================================
192 *
193 * .. Parameters ..
194 INTEGER MAXITR
195 PARAMETER ( MAXITR = 6 )
196 DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
197 PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
198 $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
199 $ TEN = 10.0D0, ZERO = 0.0D0 )
200 COMPLEX*16 NEGONECOMPLEX
201 PARAMETER ( NEGONECOMPLEX = (-1.0D0,0.0D0) )
202 * ..
203 * .. Local Scalars ..
204 LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12,
205 $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
206 $ WANTV2T
207 INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
208 $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
209 $ LRWORKMIN, LRWORKOPT, MAXIT, MINI
210 DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
211 $ EPS, MU, NU, R, SIGMA11, SIGMA21,
212 $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
213 $ UNFL, X1, X2, Y1, Y2
214 *
215 EXTERNAL DLARTGP, DLARTGS, DLAS2, XERBLA, ZLASR, ZSCAL,
216 $ ZSWAP
217 * ..
218 * .. External Functions ..
219 DOUBLE PRECISION DLAMCH
220 LOGICAL LSAME
221 EXTERNAL LSAME, DLAMCH
222 * ..
223 * .. Intrinsic Functions ..
224 INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT
225 * ..
226 * .. Executable Statements ..
227 *
228 * Test input arguments
229 *
230 INFO = 0
231 LQUERY = LRWORK .EQ. -1
232 WANTU1 = LSAME( JOBU1, 'Y' )
233 WANTU2 = LSAME( JOBU2, 'Y' )
234 WANTV1T = LSAME( JOBV1T, 'Y' )
235 WANTV2T = LSAME( JOBV2T, 'Y' )
236 COLMAJOR = .NOT. LSAME( TRANS, 'T' )
237 *
238 IF( M .LT. 0 ) THEN
239 INFO = -6
240 ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
241 INFO = -7
242 ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
243 INFO = -8
244 ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN
245 INFO = -8
246 ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
247 INFO = -12
248 ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
249 INFO = -14
250 ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
251 INFO = -16
252 ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
253 INFO = -18
254 END IF
255 *
256 * Quick return if Q = 0
257 *
258 IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
259 LRWORKMIN = 1
260 RWORK(1) = LRWORKMIN
261 RETURN
262 END IF
263 *
264 * Compute workspace
265 *
266 IF( INFO .EQ. 0 ) THEN
267 IU1CS = 1
268 IU1SN = IU1CS + Q
269 IU2CS = IU1SN + Q
270 IU2SN = IU2CS + Q
271 IV1TCS = IU2SN + Q
272 IV1TSN = IV1TCS + Q
273 IV2TCS = IV1TSN + Q
274 IV2TSN = IV2TCS + Q
275 LRWORKOPT = IV2TSN + Q - 1
276 LRWORKMIN = LRWORKOPT
277 RWORK(1) = LRWORKOPT
278 IF( LRWORK .LT. LRWORKMIN .AND. .NOT. LQUERY ) THEN
279 INFO = -28
280 END IF
281 END IF
282 *
283 IF( INFO .NE. 0 ) THEN
284 CALL XERBLA( 'ZBBCSD', -INFO )
285 RETURN
286 ELSE IF( LQUERY ) THEN
287 RETURN
288 END IF
289 *
290 * Get machine constants
291 *
292 EPS = DLAMCH( 'Epsilon' )
293 UNFL = DLAMCH( 'Safe minimum' )
294 TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
295 TOL = TOLMUL*EPS
296 THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
297 *
298 * Test for negligible sines or cosines
299 *
300 DO I = 1, Q
301 IF( THETA(I) .LT. THRESH ) THEN
302 THETA(I) = ZERO
303 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
304 THETA(I) = PIOVER2
305 END IF
306 END DO
307 DO I = 1, Q-1
308 IF( PHI(I) .LT. THRESH ) THEN
309 PHI(I) = ZERO
310 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
311 PHI(I) = PIOVER2
312 END IF
313 END DO
314 *
315 * Initial deflation
316 *
317 IMAX = Q
318 DO WHILE( ( IMAX .GT. 1 ) .AND. ( PHI(IMAX-1) .EQ. ZERO ) )
319 IMAX = IMAX - 1
320 END DO
321 IMIN = IMAX - 1
322 IF ( IMIN .GT. 1 ) THEN
323 DO WHILE( PHI(IMIN-1) .NE. ZERO )
324 IMIN = IMIN - 1
325 IF ( IMIN .LE. 1 ) EXIT
326 END DO
327 END IF
328 *
329 * Initialize iteration counter
330 *
331 MAXIT = MAXITR*Q*Q
332 ITER = 0
333 *
334 * Begin main iteration loop
335 *
336 DO WHILE( IMAX .GT. 1 )
337 *
338 * Compute the matrix entries
339 *
340 B11D(IMIN) = COS( THETA(IMIN) )
341 B21D(IMIN) = -SIN( THETA(IMIN) )
342 DO I = IMIN, IMAX - 1
343 B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
344 B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) )
345 B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
346 B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
347 B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
348 B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) )
349 B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
350 B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
351 END DO
352 B12D(IMAX) = SIN( THETA(IMAX) )
353 B22D(IMAX) = COS( THETA(IMAX) )
354 *
355 * Abort if not converging; otherwise, increment ITER
356 *
357 IF( ITER .GT. MAXIT ) THEN
358 INFO = 0
359 DO I = 1, Q
360 IF( PHI(I) .NE. ZERO )
361 $ INFO = INFO + 1
362 END DO
363 RETURN
364 END IF
365 *
366 ITER = ITER + IMAX - IMIN
367 *
368 * Compute shifts
369 *
370 THETAMAX = THETA(IMIN)
371 THETAMIN = THETA(IMIN)
372 DO I = IMIN+1, IMAX
373 IF( THETA(I) > THETAMAX )
374 $ THETAMAX = THETA(I)
375 IF( THETA(I) < THETAMIN )
376 $ THETAMIN = THETA(I)
377 END DO
378 *
379 IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
380 *
381 * Zero on diagonals of B11 and B22; induce deflation with a
382 * zero shift
383 *
384 MU = ZERO
385 NU = ONE
386 *
387 ELSE IF( THETAMIN .LT. THRESH ) THEN
388 *
389 * Zero on diagonals of B12 and B22; induce deflation with a
390 * zero shift
391 *
392 MU = ONE
393 NU = ZERO
394 *
395 ELSE
396 *
397 * Compute shifts for B11 and B21 and use the lesser
398 *
399 CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
400 $ DUMMY )
401 CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
402 $ DUMMY )
403 *
404 IF( SIGMA11 .LE. SIGMA21 ) THEN
405 MU = SIGMA11
406 NU = SQRT( ONE - MU**2 )
407 IF( MU .LT. THRESH ) THEN
408 MU = ZERO
409 NU = ONE
410 END IF
411 ELSE
412 NU = SIGMA21
413 MU = SQRT( 1.0 - NU**2 )
414 IF( NU .LT. THRESH ) THEN
415 MU = ONE
416 NU = ZERO
417 END IF
418 END IF
419 END IF
420 *
421 * Rotate to produce bulges in B11 and B21
422 *
423 IF( MU .LE. NU ) THEN
424 CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
425 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
426 ELSE
427 CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
428 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
429 END IF
430 *
431 TEMP = RWORK(IV1TCS+IMIN-1)*B11D(IMIN) +
432 $ RWORK(IV1TSN+IMIN-1)*B11E(IMIN)
433 B11E(IMIN) = RWORK(IV1TCS+IMIN-1)*B11E(IMIN) -
434 $ RWORK(IV1TSN+IMIN-1)*B11D(IMIN)
435 B11D(IMIN) = TEMP
436 B11BULGE = RWORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
437 B11D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
438 TEMP = RWORK(IV1TCS+IMIN-1)*B21D(IMIN) +
439 $ RWORK(IV1TSN+IMIN-1)*B21E(IMIN)
440 B21E(IMIN) = RWORK(IV1TCS+IMIN-1)*B21E(IMIN) -
441 $ RWORK(IV1TSN+IMIN-1)*B21D(IMIN)
442 B21D(IMIN) = TEMP
443 B21BULGE = RWORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
444 B21D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
445 *
446 * Compute THETA(IMIN)
447 *
448 THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ),
449 $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
450 *
451 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
452 *
453 IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
454 CALL DLARTGP( B11BULGE, B11D(IMIN), RWORK(IU1SN+IMIN-1),
455 $ RWORK(IU1CS+IMIN-1), R )
456 ELSE IF( MU .LE. NU ) THEN
457 CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
458 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
459 ELSE
460 CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
461 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
462 END IF
463 IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
464 CALL DLARTGP( B21BULGE, B21D(IMIN), RWORK(IU2SN+IMIN-1),
465 $ RWORK(IU2CS+IMIN-1), R )
466 ELSE IF( NU .LT. MU ) THEN
467 CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
468 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
469 ELSE
470 CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
471 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
472 END IF
473 RWORK(IU2CS+IMIN-1) = -RWORK(IU2CS+IMIN-1)
474 RWORK(IU2SN+IMIN-1) = -RWORK(IU2SN+IMIN-1)
475 *
476 TEMP = RWORK(IU1CS+IMIN-1)*B11E(IMIN) +
477 $ RWORK(IU1SN+IMIN-1)*B11D(IMIN+1)
478 B11D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11D(IMIN+1) -
479 $ RWORK(IU1SN+IMIN-1)*B11E(IMIN)
480 B11E(IMIN) = TEMP
481 IF( IMAX .GT. IMIN+1 ) THEN
482 B11BULGE = RWORK(IU1SN+IMIN-1)*B11E(IMIN+1)
483 B11E(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11E(IMIN+1)
484 END IF
485 TEMP = RWORK(IU1CS+IMIN-1)*B12D(IMIN) +
486 $ RWORK(IU1SN+IMIN-1)*B12E(IMIN)
487 B12E(IMIN) = RWORK(IU1CS+IMIN-1)*B12E(IMIN) -
488 $ RWORK(IU1SN+IMIN-1)*B12D(IMIN)
489 B12D(IMIN) = TEMP
490 B12BULGE = RWORK(IU1SN+IMIN-1)*B12D(IMIN+1)
491 B12D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B12D(IMIN+1)
492 TEMP = RWORK(IU2CS+IMIN-1)*B21E(IMIN) +
493 $ RWORK(IU2SN+IMIN-1)*B21D(IMIN+1)
494 B21D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21D(IMIN+1) -
495 $ RWORK(IU2SN+IMIN-1)*B21E(IMIN)
496 B21E(IMIN) = TEMP
497 IF( IMAX .GT. IMIN+1 ) THEN
498 B21BULGE = RWORK(IU2SN+IMIN-1)*B21E(IMIN+1)
499 B21E(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21E(IMIN+1)
500 END IF
501 TEMP = RWORK(IU2CS+IMIN-1)*B22D(IMIN) +
502 $ RWORK(IU2SN+IMIN-1)*B22E(IMIN)
503 B22E(IMIN) = RWORK(IU2CS+IMIN-1)*B22E(IMIN) -
504 $ RWORK(IU2SN+IMIN-1)*B22D(IMIN)
505 B22D(IMIN) = TEMP
506 B22BULGE = RWORK(IU2SN+IMIN-1)*B22D(IMIN+1)
507 B22D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B22D(IMIN+1)
508 *
509 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
510 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
511 * bottom-right
512 *
513 DO I = IMIN+1, IMAX-1
514 *
515 * Compute PHI(I-1)
516 *
517 X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1)
518 X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
519 Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1)
520 Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
521 *
522 PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
523 *
524 * Determine if there are bulges to chase or if a new direct
525 * summand has been reached
526 *
527 RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
528 RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
529 RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
530 RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
531 *
532 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
533 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
534 * chasing by applying the original shift again.
535 *
536 IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
537 CALL DLARTGP( X2, X1, RWORK(IV1TSN+I-1),
538 $ RWORK(IV1TCS+I-1), R )
539 ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN
540 CALL DLARTGP( B11BULGE, B11E(I-1), RWORK(IV1TSN+I-1),
541 $ RWORK(IV1TCS+I-1), R )
542 ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
543 CALL DLARTGP( B21BULGE, B21E(I-1), RWORK(IV1TSN+I-1),
544 $ RWORK(IV1TCS+I-1), R )
545 ELSE IF( MU .LE. NU ) THEN
546 CALL DLARTGS( B11D(I), B11E(I), MU, RWORK(IV1TCS+I-1),
547 $ RWORK(IV1TSN+I-1) )
548 ELSE
549 CALL DLARTGS( B21D(I), B21E(I), NU, RWORK(IV1TCS+I-1),
550 $ RWORK(IV1TSN+I-1) )
551 END IF
552 RWORK(IV1TCS+I-1) = -RWORK(IV1TCS+I-1)
553 RWORK(IV1TSN+I-1) = -RWORK(IV1TSN+I-1)
554 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
555 CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+I-1-1),
556 $ RWORK(IV2TCS+I-1-1), R )
557 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
558 CALL DLARTGP( B12BULGE, B12D(I-1), RWORK(IV2TSN+I-1-1),
559 $ RWORK(IV2TCS+I-1-1), R )
560 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
561 CALL DLARTGP( B22BULGE, B22D(I-1), RWORK(IV2TSN+I-1-1),
562 $ RWORK(IV2TCS+I-1-1), R )
563 ELSE IF( NU .LT. MU ) THEN
564 CALL DLARTGS( B12E(I-1), B12D(I), NU,
565 $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
566 ELSE
567 CALL DLARTGS( B22E(I-1), B22D(I), MU,
568 $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
569 END IF
570 *
571 TEMP = RWORK(IV1TCS+I-1)*B11D(I) + RWORK(IV1TSN+I-1)*B11E(I)
572 B11E(I) = RWORK(IV1TCS+I-1)*B11E(I) -
573 $ RWORK(IV1TSN+I-1)*B11D(I)
574 B11D(I) = TEMP
575 B11BULGE = RWORK(IV1TSN+I-1)*B11D(I+1)
576 B11D(I+1) = RWORK(IV1TCS+I-1)*B11D(I+1)
577 TEMP = RWORK(IV1TCS+I-1)*B21D(I) + RWORK(IV1TSN+I-1)*B21E(I)
578 B21E(I) = RWORK(IV1TCS+I-1)*B21E(I) -
579 $ RWORK(IV1TSN+I-1)*B21D(I)
580 B21D(I) = TEMP
581 B21BULGE = RWORK(IV1TSN+I-1)*B21D(I+1)
582 B21D(I+1) = RWORK(IV1TCS+I-1)*B21D(I+1)
583 TEMP = RWORK(IV2TCS+I-1-1)*B12E(I-1) +
584 $ RWORK(IV2TSN+I-1-1)*B12D(I)
585 B12D(I) = RWORK(IV2TCS+I-1-1)*B12D(I) -
586 $ RWORK(IV2TSN+I-1-1)*B12E(I-1)
587 B12E(I-1) = TEMP
588 B12BULGE = RWORK(IV2TSN+I-1-1)*B12E(I)
589 B12E(I) = RWORK(IV2TCS+I-1-1)*B12E(I)
590 TEMP = RWORK(IV2TCS+I-1-1)*B22E(I-1) +
591 $ RWORK(IV2TSN+I-1-1)*B22D(I)
592 B22D(I) = RWORK(IV2TCS+I-1-1)*B22D(I) -
593 $ RWORK(IV2TSN+I-1-1)*B22E(I-1)
594 B22E(I-1) = TEMP
595 B22BULGE = RWORK(IV2TSN+I-1-1)*B22E(I)
596 B22E(I) = RWORK(IV2TCS+I-1-1)*B22E(I)
597 *
598 * Compute THETA(I)
599 *
600 X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
601 X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
602 Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
603 Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
604 *
605 THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
606 *
607 * Determine if there are bulges to chase or if a new direct
608 * summand has been reached
609 *
610 RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
611 RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
612 RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
613 RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
614 *
615 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
616 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
617 * chasing by applying the original shift again.
618 *
619 IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
620 CALL DLARTGP( X2, X1, RWORK(IU1SN+I-1), RWORK(IU1CS+I-1),
621 $ R )
622 ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN
623 CALL DLARTGP( B11BULGE, B11D(I), RWORK(IU1SN+I-1),
624 $ RWORK(IU1CS+I-1), R )
625 ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
626 CALL DLARTGP( B12BULGE, B12E(I-1), RWORK(IU1SN+I-1),
627 $ RWORK(IU1CS+I-1), R )
628 ELSE IF( MU .LE. NU ) THEN
629 CALL DLARTGS( B11E(I), B11D(I+1), MU, RWORK(IU1CS+I-1),
630 $ RWORK(IU1SN+I-1) )
631 ELSE
632 CALL DLARTGS( B12D(I), B12E(I), NU, RWORK(IU1CS+I-1),
633 $ RWORK(IU1SN+I-1) )
634 END IF
635 IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
636 CALL DLARTGP( Y2, Y1, RWORK(IU2SN+I-1), RWORK(IU2CS+I-1),
637 $ R )
638 ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN
639 CALL DLARTGP( B21BULGE, B21D(I), RWORK(IU2SN+I-1),
640 $ RWORK(IU2CS+I-1), R )
641 ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
642 CALL DLARTGP( B22BULGE, B22E(I-1), RWORK(IU2SN+I-1),
643 $ RWORK(IU2CS+I-1), R )
644 ELSE IF( NU .LT. MU ) THEN
645 CALL DLARTGS( B21E(I), B21E(I+1), NU, RWORK(IU2CS+I-1),
646 $ RWORK(IU2SN+I-1) )
647 ELSE
648 CALL DLARTGS( B22D(I), B22E(I), MU, RWORK(IU2CS+I-1),
649 $ RWORK(IU2SN+I-1) )
650 END IF
651 RWORK(IU2CS+I-1) = -RWORK(IU2CS+I-1)
652 RWORK(IU2SN+I-1) = -RWORK(IU2SN+I-1)
653 *
654 TEMP = RWORK(IU1CS+I-1)*B11E(I) + RWORK(IU1SN+I-1)*B11D(I+1)
655 B11D(I+1) = RWORK(IU1CS+I-1)*B11D(I+1) -
656 $ RWORK(IU1SN+I-1)*B11E(I)
657 B11E(I) = TEMP
658 IF( I .LT. IMAX - 1 ) THEN
659 B11BULGE = RWORK(IU1SN+I-1)*B11E(I+1)
660 B11E(I+1) = RWORK(IU1CS+I-1)*B11E(I+1)
661 END IF
662 TEMP = RWORK(IU2CS+I-1)*B21E(I) + RWORK(IU2SN+I-1)*B21D(I+1)
663 B21D(I+1) = RWORK(IU2CS+I-1)*B21D(I+1) -
664 $ RWORK(IU2SN+I-1)*B21E(I)
665 B21E(I) = TEMP
666 IF( I .LT. IMAX - 1 ) THEN
667 B21BULGE = RWORK(IU2SN+I-1)*B21E(I+1)
668 B21E(I+1) = RWORK(IU2CS+I-1)*B21E(I+1)
669 END IF
670 TEMP = RWORK(IU1CS+I-1)*B12D(I) + RWORK(IU1SN+I-1)*B12E(I)
671 B12E(I) = RWORK(IU1CS+I-1)*B12E(I) -
672 $ RWORK(IU1SN+I-1)*B12D(I)
673 B12D(I) = TEMP
674 B12BULGE = RWORK(IU1SN+I-1)*B12D(I+1)
675 B12D(I+1) = RWORK(IU1CS+I-1)*B12D(I+1)
676 TEMP = RWORK(IU2CS+I-1)*B22D(I) + RWORK(IU2SN+I-1)*B22E(I)
677 B22E(I) = RWORK(IU2CS+I-1)*B22E(I) -
678 $ RWORK(IU2SN+I-1)*B22D(I)
679 B22D(I) = TEMP
680 B22BULGE = RWORK(IU2SN+I-1)*B22D(I+1)
681 B22D(I+1) = RWORK(IU2CS+I-1)*B22D(I+1)
682 *
683 END DO
684 *
685 * Compute PHI(IMAX-1)
686 *
687 X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) +
688 $ COS(THETA(IMAX-1))*B21E(IMAX-1)
689 Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) +
690 $ COS(THETA(IMAX-1))*B22D(IMAX-1)
691 Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
692 *
693 PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) )
694 *
695 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
696 *
697 RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
698 RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
699 *
700 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
701 CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+IMAX-1-1),
702 $ RWORK(IV2TCS+IMAX-1-1), R )
703 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
704 CALL DLARTGP( B12BULGE, B12D(IMAX-1),
705 $ RWORK(IV2TSN+IMAX-1-1),
706 $ RWORK(IV2TCS+IMAX-1-1), R )
707 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
708 CALL DLARTGP( B22BULGE, B22D(IMAX-1),
709 $ RWORK(IV2TSN+IMAX-1-1),
710 $ RWORK(IV2TCS+IMAX-1-1), R )
711 ELSE IF( NU .LT. MU ) THEN
712 CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
713 $ RWORK(IV2TCS+IMAX-1-1),
714 $ RWORK(IV2TSN+IMAX-1-1) )
715 ELSE
716 CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
717 $ RWORK(IV2TCS+IMAX-1-1),
718 $ RWORK(IV2TSN+IMAX-1-1) )
719 END IF
720 *
721 TEMP = RWORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) +
722 $ RWORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
723 B12D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
724 $ RWORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
725 B12E(IMAX-1) = TEMP
726 TEMP = RWORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) +
727 $ RWORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
728 B22D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
729 $ RWORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
730 B22E(IMAX-1) = TEMP
731 *
732 * Update singular vectors
733 *
734 IF( WANTU1 ) THEN
735 IF( COLMAJOR ) THEN
736 CALL ZLASR( 'R', 'V', 'F', P, IMAX-IMIN+1,
737 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
738 $ U1(1,IMIN), LDU1 )
739 ELSE
740 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, P,
741 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
742 $ U1(IMIN,1), LDU1 )
743 END IF
744 END IF
745 IF( WANTU2 ) THEN
746 IF( COLMAJOR ) THEN
747 CALL ZLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1,
748 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
749 $ U2(1,IMIN), LDU2 )
750 ELSE
751 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P,
752 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
753 $ U2(IMIN,1), LDU2 )
754 END IF
755 END IF
756 IF( WANTV1T ) THEN
757 IF( COLMAJOR ) THEN
758 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q,
759 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
760 $ V1T(IMIN,1), LDV1T )
761 ELSE
762 CALL ZLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1,
763 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
764 $ V1T(1,IMIN), LDV1T )
765 END IF
766 END IF
767 IF( WANTV2T ) THEN
768 IF( COLMAJOR ) THEN
769 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q,
770 $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
771 $ V2T(IMIN,1), LDV2T )
772 ELSE
773 CALL ZLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1,
774 $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
775 $ V2T(1,IMIN), LDV2T )
776 END IF
777 END IF
778 *
779 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
780 *
781 IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN
782 B11D(IMAX) = -B11D(IMAX)
783 B21D(IMAX) = -B21D(IMAX)
784 IF( WANTV1T ) THEN
785 IF( COLMAJOR ) THEN
786 CALL ZSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T )
787 ELSE
788 CALL ZSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 )
789 END IF
790 END IF
791 END IF
792 *
793 * Compute THETA(IMAX)
794 *
795 X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
796 $ SIN(PHI(IMAX-1))*B12E(IMAX-1)
797 Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
798 $ SIN(PHI(IMAX-1))*B22E(IMAX-1)
799 *
800 THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) )
801 *
802 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
803 * and B22(IMAX,IMAX-1)
804 *
805 IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN
806 B12D(IMAX) = -B12D(IMAX)
807 IF( WANTU1 ) THEN
808 IF( COLMAJOR ) THEN
809 CALL ZSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 )
810 ELSE
811 CALL ZSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 )
812 END IF
813 END IF
814 END IF
815 IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN
816 B22D(IMAX) = -B22D(IMAX)
817 IF( WANTU2 ) THEN
818 IF( COLMAJOR ) THEN
819 CALL ZSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 )
820 ELSE
821 CALL ZSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 )
822 END IF
823 END IF
824 END IF
825 *
826 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
827 *
828 IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
829 IF( WANTV2T ) THEN
830 IF( COLMAJOR ) THEN
831 CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T )
832 ELSE
833 CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 )
834 END IF
835 END IF
836 END IF
837 *
838 * Test for negligible sines or cosines
839 *
840 DO I = IMIN, IMAX
841 IF( THETA(I) .LT. THRESH ) THEN
842 THETA(I) = ZERO
843 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
844 THETA(I) = PIOVER2
845 END IF
846 END DO
847 DO I = IMIN, IMAX-1
848 IF( PHI(I) .LT. THRESH ) THEN
849 PHI(I) = ZERO
850 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
851 PHI(I) = PIOVER2
852 END IF
853 END DO
854 *
855 * Deflate
856 *
857 IF (IMAX .GT. 1) THEN
858 DO WHILE( PHI(IMAX-1) .EQ. ZERO )
859 IMAX = IMAX - 1
860 IF (IMAX .LE. 1) EXIT
861 END DO
862 END IF
863 IF( IMIN .GT. IMAX - 1 )
864 $ IMIN = IMAX - 1
865 IF (IMIN .GT. 1) THEN
866 DO WHILE (PHI(IMIN-1) .NE. ZERO)
867 IMIN = IMIN - 1
868 IF (IMIN .LE. 1) EXIT
869 END DO
870 END IF
871 *
872 * Repeat main iteration loop
873 *
874 END DO
875 *
876 * Postprocessing: order THETA from least to greatest
877 *
878 DO I = 1, Q
879 *
880 MINI = I
881 THETAMIN = THETA(I)
882 DO J = I+1, Q
883 IF( THETA(J) .LT. THETAMIN ) THEN
884 MINI = J
885 THETAMIN = THETA(J)
886 END IF
887 END DO
888 *
889 IF( MINI .NE. I ) THEN
890 THETA(MINI) = THETA(I)
891 THETA(I) = THETAMIN
892 IF( COLMAJOR ) THEN
893 IF( WANTU1 )
894 $ CALL ZSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
895 IF( WANTU2 )
896 $ CALL ZSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
897 IF( WANTV1T )
898 $ CALL ZSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
899 IF( WANTV2T )
900 $ CALL ZSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
901 $ LDV2T )
902 ELSE
903 IF( WANTU1 )
904 $ CALL ZSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
905 IF( WANTU2 )
906 $ CALL ZSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
907 IF( WANTV1T )
908 $ CALL ZSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
909 IF( WANTV2T )
910 $ CALL ZSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )
911 END IF
912 END IF
913 *
914 END DO
915 *
916 RETURN
917 *
918 * End of ZBBCSD
919 *
920 END
921
2 $ THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
3 $ V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
4 $ B22D, B22E, RWORK, LRWORK, INFO )
5 IMPLICIT NONE
6 *
7 * -- LAPACK routine (version 3.3.0) --
8 *
9 * -- Contributed by Brian Sutton of the Randolph-Macon College --
10 * -- November 2010
11 *
12 * -- LAPACK is a software package provided by Univ. of Tennessee, --
13 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
14 *
15 * .. Scalar Arguments ..
16 CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
17 INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LRWORK, M, P, Q
18 * ..
19 * .. Array Arguments ..
20 DOUBLE PRECISION B11D( * ), B11E( * ), B12D( * ), B12E( * ),
21 $ B21D( * ), B21E( * ), B22D( * ), B22E( * ),
22 $ PHI( * ), THETA( * ), RWORK( * )
23 COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
24 $ V2T( LDV2T, * )
25 * ..
26 *
27 * Purpose
28 * =======
29 *
30 * ZBBCSD computes the CS decomposition of a unitary matrix in
31 * bidiagonal-block form,
32 *
33 *
34 * [ B11 | B12 0 0 ]
35 * [ 0 | 0 -I 0 ]
36 * X = [----------------]
37 * [ B21 | B22 0 0 ]
38 * [ 0 | 0 0 I ]
39 *
40 * [ C | -S 0 0 ]
41 * [ U1 | ] [ 0 | 0 -I 0 ] [ V1 | ]**H
42 * = [---------] [---------------] [---------] .
43 * [ | U2 ] [ S | C 0 0 ] [ | V2 ]
44 * [ 0 | 0 0 I ]
45 *
46 * X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
47 * than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
48 * transposed and/or permuted. This can be done in constant time using
49 * the TRANS and SIGNS options. See ZUNCSD for details.)
50 *
51 * The bidiagonal matrices B11, B12, B21, and B22 are represented
52 * implicitly by angles THETA(1:Q) and PHI(1:Q-1).
53 *
54 * The unitary matrices U1, U2, V1T, and V2T are input/output.
55 * The input matrices are pre- or post-multiplied by the appropriate
56 * singular vector matrices.
57 *
58 * Arguments
59 * =========
60 *
61 * JOBU1 (input) CHARACTER
62 * = 'Y': U1 is updated;
63 * otherwise: U1 is not updated.
64 *
65 * JOBU2 (input) CHARACTER
66 * = 'Y': U2 is updated;
67 * otherwise: U2 is not updated.
68 *
69 * JOBV1T (input) CHARACTER
70 * = 'Y': V1T is updated;
71 * otherwise: V1T is not updated.
72 *
73 * JOBV2T (input) CHARACTER
74 * = 'Y': V2T is updated;
75 * otherwise: V2T is not updated.
76 *
77 * TRANS (input) CHARACTER
78 * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
79 * order;
80 * otherwise: X, U1, U2, V1T, and V2T are stored in column-
81 * major order.
82 *
83 * M (input) INTEGER
84 * The number of rows and columns in X, the unitary matrix in
85 * bidiagonal-block form.
86 *
87 * P (input) INTEGER
88 * The number of rows in the top-left block of X. 0 <= P <= M.
89 *
90 * Q (input) INTEGER
91 * The number of columns in the top-left block of X.
92 * 0 <= Q <= MIN(P,M-P,M-Q).
93 *
94 * THETA (input/output) DOUBLE PRECISION array, dimension (Q)
95 * On entry, the angles THETA(1),...,THETA(Q) that, along with
96 * PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
97 * form. On exit, the angles whose cosines and sines define the
98 * diagonal blocks in the CS decomposition.
99 *
100 * PHI (input/workspace) DOUBLE PRECISION array, dimension (Q-1)
101 * The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
102 * THETA(Q), define the matrix in bidiagonal-block form.
103 *
104 * U1 (input/output) COMPLEX*16 array, dimension (LDU1,P)
105 * On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
106 * by the left singular vector matrix common to [ B11 ; 0 ] and
107 * [ B12 0 0 ; 0 -I 0 0 ].
108 *
109 * LDU1 (input) INTEGER
110 * The leading dimension of the array U1.
111 *
112 * U2 (input/output) COMPLEX*16 array, dimension (LDU2,M-P)
113 * On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
114 * postmultiplied by the left singular vector matrix common to
115 * [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
116 *
117 * LDU2 (input) INTEGER
118 * The leading dimension of the array U2.
119 *
120 * V1T (input/output) COMPLEX*16 array, dimension (LDV1T,Q)
121 * On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
122 * by the conjugate transpose of the right singular vector
123 * matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
124 *
125 * LDV1T (input) INTEGER
126 * The leading dimension of the array V1T.
127 *
128 * V2T (input/output) COMPLEX*16 array, dimenison (LDV2T,M-Q)
129 * On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
130 * premultiplied by the conjugate transpose of the right
131 * singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
132 * [ B22 0 0 ; 0 0 I ].
133 *
134 * LDV2T (input) INTEGER
135 * The leading dimension of the array V2T.
136 *
137 * B11D (output) DOUBLE PRECISION array, dimension (Q)
138 * When ZBBCSD converges, B11D contains the cosines of THETA(1),
139 * ..., THETA(Q). If ZBBCSD fails to converge, then B11D
140 * contains the diagonal of the partially reduced top-left
141 * block.
142 *
143 * B11E (output) DOUBLE PRECISION array, dimension (Q-1)
144 * When ZBBCSD converges, B11E contains zeros. If ZBBCSD fails
145 * to converge, then B11E contains the superdiagonal of the
146 * partially reduced top-left block.
147 *
148 * B12D (output) DOUBLE PRECISION array, dimension (Q)
149 * When ZBBCSD converges, B12D contains the negative sines of
150 * THETA(1), ..., THETA(Q). If ZBBCSD fails to converge, then
151 * B12D contains the diagonal of the partially reduced top-right
152 * block.
153 *
154 * B12E (output) DOUBLE PRECISION array, dimension (Q-1)
155 * When ZBBCSD converges, B12E contains zeros. If ZBBCSD fails
156 * to converge, then B12E contains the subdiagonal of the
157 * partially reduced top-right block.
158 *
159 * RWORK (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
160 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
161 *
162 * LRWORK (input) INTEGER
163 * The dimension of the array RWORK. LRWORK >= MAX(1,8*Q).
164 *
165 * If LRWORK = -1, then a workspace query is assumed; the
166 * routine only calculates the optimal size of the RWORK array,
167 * returns this value as the first entry of the work array, and
168 * no error message related to LRWORK is issued by XERBLA.
169 *
170 * INFO (output) INTEGER
171 * = 0: successful exit.
172 * < 0: if INFO = -i, the i-th argument had an illegal value.
173 * > 0: if ZBBCSD did not converge, INFO specifies the number
174 * of nonzero entries in PHI, and B11D, B11E, etc.,
175 * contain the partially reduced matrix.
176 *
177 * Reference
178 * =========
179 *
180 * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
181 * Algorithms, 50(1):33-65, 2009.
182 *
183 * Internal Parameters
184 * ===================
185 *
186 * TOLMUL DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
187 * TOLMUL controls the convergence criterion of the QR loop.
188 * Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
189 * are within TOLMUL*EPS of either bound.
190 *
191 * ===================================================================
192 *
193 * .. Parameters ..
194 INTEGER MAXITR
195 PARAMETER ( MAXITR = 6 )
196 DOUBLE PRECISION HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
197 PARAMETER ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
198 $ ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
199 $ TEN = 10.0D0, ZERO = 0.0D0 )
200 COMPLEX*16 NEGONECOMPLEX
201 PARAMETER ( NEGONECOMPLEX = (-1.0D0,0.0D0) )
202 * ..
203 * .. Local Scalars ..
204 LOGICAL COLMAJOR, LQUERY, RESTART11, RESTART12,
205 $ RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
206 $ WANTV2T
207 INTEGER I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
208 $ IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
209 $ LRWORKMIN, LRWORKOPT, MAXIT, MINI
210 DOUBLE PRECISION B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
211 $ EPS, MU, NU, R, SIGMA11, SIGMA21,
212 $ TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
213 $ UNFL, X1, X2, Y1, Y2
214 *
215 EXTERNAL DLARTGP, DLARTGS, DLAS2, XERBLA, ZLASR, ZSCAL,
216 $ ZSWAP
217 * ..
218 * .. External Functions ..
219 DOUBLE PRECISION DLAMCH
220 LOGICAL LSAME
221 EXTERNAL LSAME, DLAMCH
222 * ..
223 * .. Intrinsic Functions ..
224 INTRINSIC ABS, ATAN2, COS, MAX, MIN, SIN, SQRT
225 * ..
226 * .. Executable Statements ..
227 *
228 * Test input arguments
229 *
230 INFO = 0
231 LQUERY = LRWORK .EQ. -1
232 WANTU1 = LSAME( JOBU1, 'Y' )
233 WANTU2 = LSAME( JOBU2, 'Y' )
234 WANTV1T = LSAME( JOBV1T, 'Y' )
235 WANTV2T = LSAME( JOBV2T, 'Y' )
236 COLMAJOR = .NOT. LSAME( TRANS, 'T' )
237 *
238 IF( M .LT. 0 ) THEN
239 INFO = -6
240 ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
241 INFO = -7
242 ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
243 INFO = -8
244 ELSE IF( Q .GT. P .OR. Q .GT. M-P .OR. Q .GT. M-Q ) THEN
245 INFO = -8
246 ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
247 INFO = -12
248 ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
249 INFO = -14
250 ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
251 INFO = -16
252 ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
253 INFO = -18
254 END IF
255 *
256 * Quick return if Q = 0
257 *
258 IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
259 LRWORKMIN = 1
260 RWORK(1) = LRWORKMIN
261 RETURN
262 END IF
263 *
264 * Compute workspace
265 *
266 IF( INFO .EQ. 0 ) THEN
267 IU1CS = 1
268 IU1SN = IU1CS + Q
269 IU2CS = IU1SN + Q
270 IU2SN = IU2CS + Q
271 IV1TCS = IU2SN + Q
272 IV1TSN = IV1TCS + Q
273 IV2TCS = IV1TSN + Q
274 IV2TSN = IV2TCS + Q
275 LRWORKOPT = IV2TSN + Q - 1
276 LRWORKMIN = LRWORKOPT
277 RWORK(1) = LRWORKOPT
278 IF( LRWORK .LT. LRWORKMIN .AND. .NOT. LQUERY ) THEN
279 INFO = -28
280 END IF
281 END IF
282 *
283 IF( INFO .NE. 0 ) THEN
284 CALL XERBLA( 'ZBBCSD', -INFO )
285 RETURN
286 ELSE IF( LQUERY ) THEN
287 RETURN
288 END IF
289 *
290 * Get machine constants
291 *
292 EPS = DLAMCH( 'Epsilon' )
293 UNFL = DLAMCH( 'Safe minimum' )
294 TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
295 TOL = TOLMUL*EPS
296 THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
297 *
298 * Test for negligible sines or cosines
299 *
300 DO I = 1, Q
301 IF( THETA(I) .LT. THRESH ) THEN
302 THETA(I) = ZERO
303 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
304 THETA(I) = PIOVER2
305 END IF
306 END DO
307 DO I = 1, Q-1
308 IF( PHI(I) .LT. THRESH ) THEN
309 PHI(I) = ZERO
310 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
311 PHI(I) = PIOVER2
312 END IF
313 END DO
314 *
315 * Initial deflation
316 *
317 IMAX = Q
318 DO WHILE( ( IMAX .GT. 1 ) .AND. ( PHI(IMAX-1) .EQ. ZERO ) )
319 IMAX = IMAX - 1
320 END DO
321 IMIN = IMAX - 1
322 IF ( IMIN .GT. 1 ) THEN
323 DO WHILE( PHI(IMIN-1) .NE. ZERO )
324 IMIN = IMIN - 1
325 IF ( IMIN .LE. 1 ) EXIT
326 END DO
327 END IF
328 *
329 * Initialize iteration counter
330 *
331 MAXIT = MAXITR*Q*Q
332 ITER = 0
333 *
334 * Begin main iteration loop
335 *
336 DO WHILE( IMAX .GT. 1 )
337 *
338 * Compute the matrix entries
339 *
340 B11D(IMIN) = COS( THETA(IMIN) )
341 B21D(IMIN) = -SIN( THETA(IMIN) )
342 DO I = IMIN, IMAX - 1
343 B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
344 B11D(I+1) = COS( THETA(I+1) ) * COS( PHI(I) )
345 B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
346 B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
347 B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
348 B21D(I+1) = -SIN( THETA(I+1) ) * COS( PHI(I) )
349 B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
350 B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
351 END DO
352 B12D(IMAX) = SIN( THETA(IMAX) )
353 B22D(IMAX) = COS( THETA(IMAX) )
354 *
355 * Abort if not converging; otherwise, increment ITER
356 *
357 IF( ITER .GT. MAXIT ) THEN
358 INFO = 0
359 DO I = 1, Q
360 IF( PHI(I) .NE. ZERO )
361 $ INFO = INFO + 1
362 END DO
363 RETURN
364 END IF
365 *
366 ITER = ITER + IMAX - IMIN
367 *
368 * Compute shifts
369 *
370 THETAMAX = THETA(IMIN)
371 THETAMIN = THETA(IMIN)
372 DO I = IMIN+1, IMAX
373 IF( THETA(I) > THETAMAX )
374 $ THETAMAX = THETA(I)
375 IF( THETA(I) < THETAMIN )
376 $ THETAMIN = THETA(I)
377 END DO
378 *
379 IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
380 *
381 * Zero on diagonals of B11 and B22; induce deflation with a
382 * zero shift
383 *
384 MU = ZERO
385 NU = ONE
386 *
387 ELSE IF( THETAMIN .LT. THRESH ) THEN
388 *
389 * Zero on diagonals of B12 and B22; induce deflation with a
390 * zero shift
391 *
392 MU = ONE
393 NU = ZERO
394 *
395 ELSE
396 *
397 * Compute shifts for B11 and B21 and use the lesser
398 *
399 CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
400 $ DUMMY )
401 CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
402 $ DUMMY )
403 *
404 IF( SIGMA11 .LE. SIGMA21 ) THEN
405 MU = SIGMA11
406 NU = SQRT( ONE - MU**2 )
407 IF( MU .LT. THRESH ) THEN
408 MU = ZERO
409 NU = ONE
410 END IF
411 ELSE
412 NU = SIGMA21
413 MU = SQRT( 1.0 - NU**2 )
414 IF( NU .LT. THRESH ) THEN
415 MU = ONE
416 NU = ZERO
417 END IF
418 END IF
419 END IF
420 *
421 * Rotate to produce bulges in B11 and B21
422 *
423 IF( MU .LE. NU ) THEN
424 CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
425 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
426 ELSE
427 CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
428 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1) )
429 END IF
430 *
431 TEMP = RWORK(IV1TCS+IMIN-1)*B11D(IMIN) +
432 $ RWORK(IV1TSN+IMIN-1)*B11E(IMIN)
433 B11E(IMIN) = RWORK(IV1TCS+IMIN-1)*B11E(IMIN) -
434 $ RWORK(IV1TSN+IMIN-1)*B11D(IMIN)
435 B11D(IMIN) = TEMP
436 B11BULGE = RWORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
437 B11D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
438 TEMP = RWORK(IV1TCS+IMIN-1)*B21D(IMIN) +
439 $ RWORK(IV1TSN+IMIN-1)*B21E(IMIN)
440 B21E(IMIN) = RWORK(IV1TCS+IMIN-1)*B21E(IMIN) -
441 $ RWORK(IV1TSN+IMIN-1)*B21D(IMIN)
442 B21D(IMIN) = TEMP
443 B21BULGE = RWORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
444 B21D(IMIN+1) = RWORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
445 *
446 * Compute THETA(IMIN)
447 *
448 THETA( IMIN ) = ATAN2( SQRT( B21D(IMIN)**2+B21BULGE**2 ),
449 $ SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
450 *
451 * Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
452 *
453 IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
454 CALL DLARTGP( B11BULGE, B11D(IMIN), RWORK(IU1SN+IMIN-1),
455 $ RWORK(IU1CS+IMIN-1), R )
456 ELSE IF( MU .LE. NU ) THEN
457 CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
458 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
459 ELSE
460 CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
461 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1) )
462 END IF
463 IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
464 CALL DLARTGP( B21BULGE, B21D(IMIN), RWORK(IU2SN+IMIN-1),
465 $ RWORK(IU2CS+IMIN-1), R )
466 ELSE IF( NU .LT. MU ) THEN
467 CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
468 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
469 ELSE
470 CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
471 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1) )
472 END IF
473 RWORK(IU2CS+IMIN-1) = -RWORK(IU2CS+IMIN-1)
474 RWORK(IU2SN+IMIN-1) = -RWORK(IU2SN+IMIN-1)
475 *
476 TEMP = RWORK(IU1CS+IMIN-1)*B11E(IMIN) +
477 $ RWORK(IU1SN+IMIN-1)*B11D(IMIN+1)
478 B11D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11D(IMIN+1) -
479 $ RWORK(IU1SN+IMIN-1)*B11E(IMIN)
480 B11E(IMIN) = TEMP
481 IF( IMAX .GT. IMIN+1 ) THEN
482 B11BULGE = RWORK(IU1SN+IMIN-1)*B11E(IMIN+1)
483 B11E(IMIN+1) = RWORK(IU1CS+IMIN-1)*B11E(IMIN+1)
484 END IF
485 TEMP = RWORK(IU1CS+IMIN-1)*B12D(IMIN) +
486 $ RWORK(IU1SN+IMIN-1)*B12E(IMIN)
487 B12E(IMIN) = RWORK(IU1CS+IMIN-1)*B12E(IMIN) -
488 $ RWORK(IU1SN+IMIN-1)*B12D(IMIN)
489 B12D(IMIN) = TEMP
490 B12BULGE = RWORK(IU1SN+IMIN-1)*B12D(IMIN+1)
491 B12D(IMIN+1) = RWORK(IU1CS+IMIN-1)*B12D(IMIN+1)
492 TEMP = RWORK(IU2CS+IMIN-1)*B21E(IMIN) +
493 $ RWORK(IU2SN+IMIN-1)*B21D(IMIN+1)
494 B21D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21D(IMIN+1) -
495 $ RWORK(IU2SN+IMIN-1)*B21E(IMIN)
496 B21E(IMIN) = TEMP
497 IF( IMAX .GT. IMIN+1 ) THEN
498 B21BULGE = RWORK(IU2SN+IMIN-1)*B21E(IMIN+1)
499 B21E(IMIN+1) = RWORK(IU2CS+IMIN-1)*B21E(IMIN+1)
500 END IF
501 TEMP = RWORK(IU2CS+IMIN-1)*B22D(IMIN) +
502 $ RWORK(IU2SN+IMIN-1)*B22E(IMIN)
503 B22E(IMIN) = RWORK(IU2CS+IMIN-1)*B22E(IMIN) -
504 $ RWORK(IU2SN+IMIN-1)*B22D(IMIN)
505 B22D(IMIN) = TEMP
506 B22BULGE = RWORK(IU2SN+IMIN-1)*B22D(IMIN+1)
507 B22D(IMIN+1) = RWORK(IU2CS+IMIN-1)*B22D(IMIN+1)
508 *
509 * Inner loop: chase bulges from B11(IMIN,IMIN+2),
510 * B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
511 * bottom-right
512 *
513 DO I = IMIN+1, IMAX-1
514 *
515 * Compute PHI(I-1)
516 *
517 X1 = SIN(THETA(I-1))*B11E(I-1) + COS(THETA(I-1))*B21E(I-1)
518 X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
519 Y1 = SIN(THETA(I-1))*B12D(I-1) + COS(THETA(I-1))*B22D(I-1)
520 Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
521 *
522 PHI(I-1) = ATAN2( SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
523 *
524 * Determine if there are bulges to chase or if a new direct
525 * summand has been reached
526 *
527 RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
528 RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
529 RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
530 RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
531 *
532 * If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
533 * B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
534 * chasing by applying the original shift again.
535 *
536 IF( .NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
537 CALL DLARTGP( X2, X1, RWORK(IV1TSN+I-1),
538 $ RWORK(IV1TCS+I-1), R )
539 ELSE IF( .NOT. RESTART11 .AND. RESTART21 ) THEN
540 CALL DLARTGP( B11BULGE, B11E(I-1), RWORK(IV1TSN+I-1),
541 $ RWORK(IV1TCS+I-1), R )
542 ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
543 CALL DLARTGP( B21BULGE, B21E(I-1), RWORK(IV1TSN+I-1),
544 $ RWORK(IV1TCS+I-1), R )
545 ELSE IF( MU .LE. NU ) THEN
546 CALL DLARTGS( B11D(I), B11E(I), MU, RWORK(IV1TCS+I-1),
547 $ RWORK(IV1TSN+I-1) )
548 ELSE
549 CALL DLARTGS( B21D(I), B21E(I), NU, RWORK(IV1TCS+I-1),
550 $ RWORK(IV1TSN+I-1) )
551 END IF
552 RWORK(IV1TCS+I-1) = -RWORK(IV1TCS+I-1)
553 RWORK(IV1TSN+I-1) = -RWORK(IV1TSN+I-1)
554 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
555 CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+I-1-1),
556 $ RWORK(IV2TCS+I-1-1), R )
557 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
558 CALL DLARTGP( B12BULGE, B12D(I-1), RWORK(IV2TSN+I-1-1),
559 $ RWORK(IV2TCS+I-1-1), R )
560 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
561 CALL DLARTGP( B22BULGE, B22D(I-1), RWORK(IV2TSN+I-1-1),
562 $ RWORK(IV2TCS+I-1-1), R )
563 ELSE IF( NU .LT. MU ) THEN
564 CALL DLARTGS( B12E(I-1), B12D(I), NU,
565 $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
566 ELSE
567 CALL DLARTGS( B22E(I-1), B22D(I), MU,
568 $ RWORK(IV2TCS+I-1-1), RWORK(IV2TSN+I-1-1) )
569 END IF
570 *
571 TEMP = RWORK(IV1TCS+I-1)*B11D(I) + RWORK(IV1TSN+I-1)*B11E(I)
572 B11E(I) = RWORK(IV1TCS+I-1)*B11E(I) -
573 $ RWORK(IV1TSN+I-1)*B11D(I)
574 B11D(I) = TEMP
575 B11BULGE = RWORK(IV1TSN+I-1)*B11D(I+1)
576 B11D(I+1) = RWORK(IV1TCS+I-1)*B11D(I+1)
577 TEMP = RWORK(IV1TCS+I-1)*B21D(I) + RWORK(IV1TSN+I-1)*B21E(I)
578 B21E(I) = RWORK(IV1TCS+I-1)*B21E(I) -
579 $ RWORK(IV1TSN+I-1)*B21D(I)
580 B21D(I) = TEMP
581 B21BULGE = RWORK(IV1TSN+I-1)*B21D(I+1)
582 B21D(I+1) = RWORK(IV1TCS+I-1)*B21D(I+1)
583 TEMP = RWORK(IV2TCS+I-1-1)*B12E(I-1) +
584 $ RWORK(IV2TSN+I-1-1)*B12D(I)
585 B12D(I) = RWORK(IV2TCS+I-1-1)*B12D(I) -
586 $ RWORK(IV2TSN+I-1-1)*B12E(I-1)
587 B12E(I-1) = TEMP
588 B12BULGE = RWORK(IV2TSN+I-1-1)*B12E(I)
589 B12E(I) = RWORK(IV2TCS+I-1-1)*B12E(I)
590 TEMP = RWORK(IV2TCS+I-1-1)*B22E(I-1) +
591 $ RWORK(IV2TSN+I-1-1)*B22D(I)
592 B22D(I) = RWORK(IV2TCS+I-1-1)*B22D(I) -
593 $ RWORK(IV2TSN+I-1-1)*B22E(I-1)
594 B22E(I-1) = TEMP
595 B22BULGE = RWORK(IV2TSN+I-1-1)*B22E(I)
596 B22E(I) = RWORK(IV2TCS+I-1-1)*B22E(I)
597 *
598 * Compute THETA(I)
599 *
600 X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
601 X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
602 Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
603 Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
604 *
605 THETA(I) = ATAN2( SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
606 *
607 * Determine if there are bulges to chase or if a new direct
608 * summand has been reached
609 *
610 RESTART11 = B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
611 RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
612 RESTART21 = B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
613 RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
614 *
615 * If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
616 * B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
617 * chasing by applying the original shift again.
618 *
619 IF( .NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
620 CALL DLARTGP( X2, X1, RWORK(IU1SN+I-1), RWORK(IU1CS+I-1),
621 $ R )
622 ELSE IF( .NOT. RESTART11 .AND. RESTART12 ) THEN
623 CALL DLARTGP( B11BULGE, B11D(I), RWORK(IU1SN+I-1),
624 $ RWORK(IU1CS+I-1), R )
625 ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
626 CALL DLARTGP( B12BULGE, B12E(I-1), RWORK(IU1SN+I-1),
627 $ RWORK(IU1CS+I-1), R )
628 ELSE IF( MU .LE. NU ) THEN
629 CALL DLARTGS( B11E(I), B11D(I+1), MU, RWORK(IU1CS+I-1),
630 $ RWORK(IU1SN+I-1) )
631 ELSE
632 CALL DLARTGS( B12D(I), B12E(I), NU, RWORK(IU1CS+I-1),
633 $ RWORK(IU1SN+I-1) )
634 END IF
635 IF( .NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
636 CALL DLARTGP( Y2, Y1, RWORK(IU2SN+I-1), RWORK(IU2CS+I-1),
637 $ R )
638 ELSE IF( .NOT. RESTART21 .AND. RESTART22 ) THEN
639 CALL DLARTGP( B21BULGE, B21D(I), RWORK(IU2SN+I-1),
640 $ RWORK(IU2CS+I-1), R )
641 ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
642 CALL DLARTGP( B22BULGE, B22E(I-1), RWORK(IU2SN+I-1),
643 $ RWORK(IU2CS+I-1), R )
644 ELSE IF( NU .LT. MU ) THEN
645 CALL DLARTGS( B21E(I), B21E(I+1), NU, RWORK(IU2CS+I-1),
646 $ RWORK(IU2SN+I-1) )
647 ELSE
648 CALL DLARTGS( B22D(I), B22E(I), MU, RWORK(IU2CS+I-1),
649 $ RWORK(IU2SN+I-1) )
650 END IF
651 RWORK(IU2CS+I-1) = -RWORK(IU2CS+I-1)
652 RWORK(IU2SN+I-1) = -RWORK(IU2SN+I-1)
653 *
654 TEMP = RWORK(IU1CS+I-1)*B11E(I) + RWORK(IU1SN+I-1)*B11D(I+1)
655 B11D(I+1) = RWORK(IU1CS+I-1)*B11D(I+1) -
656 $ RWORK(IU1SN+I-1)*B11E(I)
657 B11E(I) = TEMP
658 IF( I .LT. IMAX - 1 ) THEN
659 B11BULGE = RWORK(IU1SN+I-1)*B11E(I+1)
660 B11E(I+1) = RWORK(IU1CS+I-1)*B11E(I+1)
661 END IF
662 TEMP = RWORK(IU2CS+I-1)*B21E(I) + RWORK(IU2SN+I-1)*B21D(I+1)
663 B21D(I+1) = RWORK(IU2CS+I-1)*B21D(I+1) -
664 $ RWORK(IU2SN+I-1)*B21E(I)
665 B21E(I) = TEMP
666 IF( I .LT. IMAX - 1 ) THEN
667 B21BULGE = RWORK(IU2SN+I-1)*B21E(I+1)
668 B21E(I+1) = RWORK(IU2CS+I-1)*B21E(I+1)
669 END IF
670 TEMP = RWORK(IU1CS+I-1)*B12D(I) + RWORK(IU1SN+I-1)*B12E(I)
671 B12E(I) = RWORK(IU1CS+I-1)*B12E(I) -
672 $ RWORK(IU1SN+I-1)*B12D(I)
673 B12D(I) = TEMP
674 B12BULGE = RWORK(IU1SN+I-1)*B12D(I+1)
675 B12D(I+1) = RWORK(IU1CS+I-1)*B12D(I+1)
676 TEMP = RWORK(IU2CS+I-1)*B22D(I) + RWORK(IU2SN+I-1)*B22E(I)
677 B22E(I) = RWORK(IU2CS+I-1)*B22E(I) -
678 $ RWORK(IU2SN+I-1)*B22D(I)
679 B22D(I) = TEMP
680 B22BULGE = RWORK(IU2SN+I-1)*B22D(I+1)
681 B22D(I+1) = RWORK(IU2CS+I-1)*B22D(I+1)
682 *
683 END DO
684 *
685 * Compute PHI(IMAX-1)
686 *
687 X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1) +
688 $ COS(THETA(IMAX-1))*B21E(IMAX-1)
689 Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1) +
690 $ COS(THETA(IMAX-1))*B22D(IMAX-1)
691 Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
692 *
693 PHI(IMAX-1) = ATAN2( ABS(X1), SQRT(Y1**2+Y2**2) )
694 *
695 * Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
696 *
697 RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
698 RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
699 *
700 IF( .NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
701 CALL DLARTGP( Y2, Y1, RWORK(IV2TSN+IMAX-1-1),
702 $ RWORK(IV2TCS+IMAX-1-1), R )
703 ELSE IF( .NOT. RESTART12 .AND. RESTART22 ) THEN
704 CALL DLARTGP( B12BULGE, B12D(IMAX-1),
705 $ RWORK(IV2TSN+IMAX-1-1),
706 $ RWORK(IV2TCS+IMAX-1-1), R )
707 ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
708 CALL DLARTGP( B22BULGE, B22D(IMAX-1),
709 $ RWORK(IV2TSN+IMAX-1-1),
710 $ RWORK(IV2TCS+IMAX-1-1), R )
711 ELSE IF( NU .LT. MU ) THEN
712 CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
713 $ RWORK(IV2TCS+IMAX-1-1),
714 $ RWORK(IV2TSN+IMAX-1-1) )
715 ELSE
716 CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
717 $ RWORK(IV2TCS+IMAX-1-1),
718 $ RWORK(IV2TSN+IMAX-1-1) )
719 END IF
720 *
721 TEMP = RWORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1) +
722 $ RWORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
723 B12D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
724 $ RWORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
725 B12E(IMAX-1) = TEMP
726 TEMP = RWORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1) +
727 $ RWORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
728 B22D(IMAX) = RWORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
729 $ RWORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
730 B22E(IMAX-1) = TEMP
731 *
732 * Update singular vectors
733 *
734 IF( WANTU1 ) THEN
735 IF( COLMAJOR ) THEN
736 CALL ZLASR( 'R', 'V', 'F', P, IMAX-IMIN+1,
737 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
738 $ U1(1,IMIN), LDU1 )
739 ELSE
740 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, P,
741 $ RWORK(IU1CS+IMIN-1), RWORK(IU1SN+IMIN-1),
742 $ U1(IMIN,1), LDU1 )
743 END IF
744 END IF
745 IF( WANTU2 ) THEN
746 IF( COLMAJOR ) THEN
747 CALL ZLASR( 'R', 'V', 'F', M-P, IMAX-IMIN+1,
748 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
749 $ U2(1,IMIN), LDU2 )
750 ELSE
751 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-P,
752 $ RWORK(IU2CS+IMIN-1), RWORK(IU2SN+IMIN-1),
753 $ U2(IMIN,1), LDU2 )
754 END IF
755 END IF
756 IF( WANTV1T ) THEN
757 IF( COLMAJOR ) THEN
758 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, Q,
759 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
760 $ V1T(IMIN,1), LDV1T )
761 ELSE
762 CALL ZLASR( 'R', 'V', 'F', Q, IMAX-IMIN+1,
763 $ RWORK(IV1TCS+IMIN-1), RWORK(IV1TSN+IMIN-1),
764 $ V1T(1,IMIN), LDV1T )
765 END IF
766 END IF
767 IF( WANTV2T ) THEN
768 IF( COLMAJOR ) THEN
769 CALL ZLASR( 'L', 'V', 'F', IMAX-IMIN+1, M-Q,
770 $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
771 $ V2T(IMIN,1), LDV2T )
772 ELSE
773 CALL ZLASR( 'R', 'V', 'F', M-Q, IMAX-IMIN+1,
774 $ RWORK(IV2TCS+IMIN-1), RWORK(IV2TSN+IMIN-1),
775 $ V2T(1,IMIN), LDV2T )
776 END IF
777 END IF
778 *
779 * Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
780 *
781 IF( B11E(IMAX-1)+B21E(IMAX-1) .GT. 0 ) THEN
782 B11D(IMAX) = -B11D(IMAX)
783 B21D(IMAX) = -B21D(IMAX)
784 IF( WANTV1T ) THEN
785 IF( COLMAJOR ) THEN
786 CALL ZSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T )
787 ELSE
788 CALL ZSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 )
789 END IF
790 END IF
791 END IF
792 *
793 * Compute THETA(IMAX)
794 *
795 X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
796 $ SIN(PHI(IMAX-1))*B12E(IMAX-1)
797 Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
798 $ SIN(PHI(IMAX-1))*B22E(IMAX-1)
799 *
800 THETA(IMAX) = ATAN2( ABS(Y1), ABS(X1) )
801 *
802 * Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
803 * and B22(IMAX,IMAX-1)
804 *
805 IF( B11D(IMAX)+B12E(IMAX-1) .LT. 0 ) THEN
806 B12D(IMAX) = -B12D(IMAX)
807 IF( WANTU1 ) THEN
808 IF( COLMAJOR ) THEN
809 CALL ZSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 )
810 ELSE
811 CALL ZSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 )
812 END IF
813 END IF
814 END IF
815 IF( B21D(IMAX)+B22E(IMAX-1) .GT. 0 ) THEN
816 B22D(IMAX) = -B22D(IMAX)
817 IF( WANTU2 ) THEN
818 IF( COLMAJOR ) THEN
819 CALL ZSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 )
820 ELSE
821 CALL ZSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 )
822 END IF
823 END IF
824 END IF
825 *
826 * Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
827 *
828 IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
829 IF( WANTV2T ) THEN
830 IF( COLMAJOR ) THEN
831 CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T )
832 ELSE
833 CALL ZSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 )
834 END IF
835 END IF
836 END IF
837 *
838 * Test for negligible sines or cosines
839 *
840 DO I = IMIN, IMAX
841 IF( THETA(I) .LT. THRESH ) THEN
842 THETA(I) = ZERO
843 ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
844 THETA(I) = PIOVER2
845 END IF
846 END DO
847 DO I = IMIN, IMAX-1
848 IF( PHI(I) .LT. THRESH ) THEN
849 PHI(I) = ZERO
850 ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
851 PHI(I) = PIOVER2
852 END IF
853 END DO
854 *
855 * Deflate
856 *
857 IF (IMAX .GT. 1) THEN
858 DO WHILE( PHI(IMAX-1) .EQ. ZERO )
859 IMAX = IMAX - 1
860 IF (IMAX .LE. 1) EXIT
861 END DO
862 END IF
863 IF( IMIN .GT. IMAX - 1 )
864 $ IMIN = IMAX - 1
865 IF (IMIN .GT. 1) THEN
866 DO WHILE (PHI(IMIN-1) .NE. ZERO)
867 IMIN = IMIN - 1
868 IF (IMIN .LE. 1) EXIT
869 END DO
870 END IF
871 *
872 * Repeat main iteration loop
873 *
874 END DO
875 *
876 * Postprocessing: order THETA from least to greatest
877 *
878 DO I = 1, Q
879 *
880 MINI = I
881 THETAMIN = THETA(I)
882 DO J = I+1, Q
883 IF( THETA(J) .LT. THETAMIN ) THEN
884 MINI = J
885 THETAMIN = THETA(J)
886 END IF
887 END DO
888 *
889 IF( MINI .NE. I ) THEN
890 THETA(MINI) = THETA(I)
891 THETA(I) = THETAMIN
892 IF( COLMAJOR ) THEN
893 IF( WANTU1 )
894 $ CALL ZSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
895 IF( WANTU2 )
896 $ CALL ZSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
897 IF( WANTV1T )
898 $ CALL ZSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
899 IF( WANTV2T )
900 $ CALL ZSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
901 $ LDV2T )
902 ELSE
903 IF( WANTU1 )
904 $ CALL ZSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
905 IF( WANTU2 )
906 $ CALL ZSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
907 IF( WANTV1T )
908 $ CALL ZSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
909 IF( WANTV2T )
910 $ CALL ZSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )
911 END IF
912 END IF
913 *
914 END DO
915 *
916 RETURN
917 *
918 * End of ZBBCSD
919 *
920 END
921