1 RECURSIVE SUBROUTINE ZUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS,
2 $ SIGNS, M, P, Q, X11, LDX11, X12,
3 $ LDX12, X21, LDX21, X22, LDX22, THETA,
4 $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
5 $ LDV2T, WORK, LWORK, RWORK, LRWORK,
6 $ IWORK, INFO )
7 IMPLICIT NONE
8 *
9 * -- LAPACK routine (version 3.3.1) --
10 *
11 * -- Contributed by Brian Sutton of the Randolph-Macon College --
12 * -- November 2010
13 *
14 * -- LAPACK is a software package provided by Univ. of Tennessee, --
15 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
16 *
17 * @precisions normal z -> c
18 *
19 * .. Scalar Arguments ..
20 CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS
21 INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12,
22 $ LDX21, LDX22, LRWORK, LWORK, M, P, Q
23 * ..
24 * .. Array Arguments ..
25 INTEGER IWORK( * )
26 DOUBLE PRECISION THETA( * )
27 DOUBLE PRECISION RWORK( * )
28 COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
29 $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ),
30 $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22,
31 $ * )
32 * ..
33 *
34 * Purpose
35 * =======
36 *
37 * ZUNCSD computes the CS decomposition of an M-by-M partitioned
38 * unitary matrix X:
39 *
40 * [ I 0 0 | 0 0 0 ]
41 * [ 0 C 0 | 0 -S 0 ]
42 * [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H
43 * X = [-----------] = [---------] [---------------------] [---------] .
44 * [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ]
45 * [ 0 S 0 | 0 C 0 ]
46 * [ 0 0 I | 0 0 0 ]
47 *
48 * X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P,
49 * (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are
50 * R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in
51 * which R = MIN(P,M-P,Q,M-Q).
52 *
53 * Arguments
54 * =========
55 *
56 * JOBU1 (input) CHARACTER
57 * = 'Y': U1 is computed;
58 * otherwise: U1 is not computed.
59 *
60 * JOBU2 (input) CHARACTER
61 * = 'Y': U2 is computed;
62 * otherwise: U2 is not computed.
63 *
64 * JOBV1T (input) CHARACTER
65 * = 'Y': V1T is computed;
66 * otherwise: V1T is not computed.
67 *
68 * JOBV2T (input) CHARACTER
69 * = 'Y': V2T is computed;
70 * otherwise: V2T is not computed.
71 *
72 * TRANS (input) CHARACTER
73 * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
74 * order;
75 * otherwise: X, U1, U2, V1T, and V2T are stored in column-
76 * major order.
77 *
78 * SIGNS (input) CHARACTER
79 * = 'O': The lower-left block is made nonpositive (the
80 * "other" convention);
81 * otherwise: The upper-right block is made nonpositive (the
82 * "default" convention).
83 *
84 * M (input) INTEGER
85 * The number of rows and columns in X.
86 *
87 * P (input) INTEGER
88 * The number of rows in X11 and X12. 0 <= P <= M.
89 *
90 * Q (input) INTEGER
91 * The number of columns in X11 and X21. 0 <= Q <= M.
92 *
93 * X (input/workspace) COMPLEX*16 array, dimension (LDX,M)
94 * On entry, the unitary matrix whose CSD is desired.
95 *
96 * LDX (input) INTEGER
97 * The leading dimension of X. LDX >= MAX(1,M).
98 *
99 * THETA (output) DOUBLE PRECISION array, dimension (R), in which R =
100 * MIN(P,M-P,Q,M-Q).
101 * C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and
102 * S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ).
103 *
104 * U1 (output) COMPLEX*16 array, dimension (P)
105 * If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1.
106 *
107 * LDU1 (input) INTEGER
108 * The leading dimension of U1. If JOBU1 = 'Y', LDU1 >=
109 * MAX(1,P).
110 *
111 * U2 (output) COMPLEX*16 array, dimension (M-P)
112 * If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary
113 * matrix U2.
114 *
115 * LDU2 (input) INTEGER
116 * The leading dimension of U2. If JOBU2 = 'Y', LDU2 >=
117 * MAX(1,M-P).
118 *
119 * V1T (output) COMPLEX*16 array, dimension (Q)
120 * If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary
121 * matrix V1**H.
122 *
123 * LDV1T (input) INTEGER
124 * The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >=
125 * MAX(1,Q).
126 *
127 * V2T (output) COMPLEX*16 array, dimension (M-Q)
128 * If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary
129 * matrix V2**H.
130 *
131 * LDV2T (input) INTEGER
132 * The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >=
133 * MAX(1,M-Q).
134 *
135 * WORK (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
136 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
137 *
138 * LWORK (input) INTEGER
139 * The dimension of the array WORK.
140 *
141 * If LWORK = -1, then a workspace query is assumed; the routine
142 * only calculates the optimal size of the WORK array, returns
143 * this value as the first entry of the work array, and no error
144 * message related to LWORK is issued by XERBLA.
145 *
146 * RWORK (workspace) DOUBLE PRECISION array, dimension MAX(1,LRWORK)
147 * On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
148 * If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1),
149 * ..., PHI(R-1) that, together with THETA(1), ..., THETA(R),
150 * define the matrix in intermediate bidiagonal-block form
151 * remaining after nonconvergence. INFO specifies the number
152 * of nonzero PHI's.
153 *
154 * LRWORK (input) INTEGER
155 * The dimension of the array RWORK.
156 *
157 * If LRWORK = -1, then a workspace query is assumed; the routine
158 * only calculates the optimal size of the RWORK array, returns
159 * this value as the first entry of the work array, and no error
160 * message related to LRWORK is issued by XERBLA.
161 *
162 * IWORK (workspace) INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q))
163 *
164 * INFO (output) INTEGER
165 * = 0: successful exit.
166 * < 0: if INFO = -i, the i-th argument had an illegal value.
167 * > 0: ZBBCSD did not converge. See the description of RWORK
168 * above for details.
169 *
170 * Reference
171 * =========
172 *
173 * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
174 * Algorithms, 50(1):33-65, 2009.
175 *
176 * ===================================================================
177 *
178 * .. Parameters ..
179 DOUBLE PRECISION REALONE
180 PARAMETER ( REALONE = 1.0D0 )
181 COMPLEX*16 NEGONE, ONE, PIOVER2, ZERO
182 PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0),
183 $ PIOVER2 = 1.57079632679489662D0,
184 $ ZERO = (0.0D0,0.0D0) )
185 * ..
186 * .. Local Scalars ..
187 CHARACTER TRANST, SIGNST
188 INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E,
189 $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB,
190 $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1,
191 $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN,
192 $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN,
193 $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN,
194 $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN,
195 $ LORGQRWORKOPT, LWORKMIN, LWORKOPT
196 LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2,
197 $ WANTV1T, WANTV2T
198 INTEGER LRWORKMIN, LRWORKOPT
199 LOGICAL LRQUERY
200 * ..
201 * .. External Subroutines ..
202 EXTERNAL XERBLA, ZBBCSD, ZLACPY, ZLAPMR, ZLAPMT, ZLASCL,
203 $ ZLASET, ZUNBDB, ZUNGLQ, ZUNGQR
204 * ..
205 * .. External Functions ..
206 LOGICAL LSAME
207 EXTERNAL LSAME
208 * ..
209 * .. Intrinsic Functions
210 INTRINSIC COS, INT, MAX, MIN, SIN
211 * ..
212 * .. Executable Statements ..
213 *
214 * Test input arguments
215 *
216 INFO = 0
217 WANTU1 = LSAME( JOBU1, 'Y' )
218 WANTU2 = LSAME( JOBU2, 'Y' )
219 WANTV1T = LSAME( JOBV1T, 'Y' )
220 WANTV2T = LSAME( JOBV2T, 'Y' )
221 COLMAJOR = .NOT. LSAME( TRANS, 'T' )
222 DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' )
223 LQUERY = LWORK .EQ. -1
224 LRQUERY = LRWORK .EQ. -1
225 IF( M .LT. 0 ) THEN
226 INFO = -7
227 ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
228 INFO = -8
229 ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
230 INFO = -9
231 ELSE IF( ( COLMAJOR .AND. LDX11 .LT. MAX(1,P) ) .OR.
232 $ ( .NOT.COLMAJOR .AND. LDX11 .LT. MAX(1,Q) ) ) THEN
233 INFO = -11
234 ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
235 INFO = -14
236 ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
237 INFO = -16
238 ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
239 INFO = -18
240 ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
241 INFO = -20
242 END IF
243 *
244 * Work with transpose if convenient
245 *
246 IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN
247 IF( COLMAJOR ) THEN
248 TRANST = 'T'
249 ELSE
250 TRANST = 'N'
251 END IF
252 IF( DEFAULTSIGNS ) THEN
253 SIGNST = 'O'
254 ELSE
255 SIGNST = 'D'
256 END IF
257 CALL ZUNCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M,
258 $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22,
259 $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1,
260 $ U2, LDU2, WORK, LWORK, RWORK, LRWORK, IWORK,
261 $ INFO )
262 RETURN
263 END IF
264 *
265 * Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if
266 * convenient
267 *
268 IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN
269 IF( DEFAULTSIGNS ) THEN
270 SIGNST = 'O'
271 ELSE
272 SIGNST = 'D'
273 END IF
274 CALL ZUNCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M,
275 $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11,
276 $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T,
277 $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
278 RETURN
279 END IF
280 *
281 * Compute workspace
282 *
283 IF( INFO .EQ. 0 ) THEN
284 *
285 * Real workspace
286 *
287 IPHI = 2
288 IB11D = IPHI + MAX( 1, Q - 1 )
289 IB11E = IB11D + MAX( 1, Q )
290 IB12D = IB11E + MAX( 1, Q - 1 )
291 IB12E = IB12D + MAX( 1, Q )
292 IB21D = IB12E + MAX( 1, Q - 1 )
293 IB21E = IB21D + MAX( 1, Q )
294 IB22D = IB21E + MAX( 1, Q - 1 )
295 IB22E = IB22D + MAX( 1, Q )
296 IBBCSD = IB22E + MAX( 1, Q - 1 )
297 CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, 0,
298 $ 0, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, LDV2T, 0,
299 $ 0, 0, 0, 0, 0, 0, 0, RWORK, -1, CHILDINFO )
300 LBBCSDWORKOPT = INT( RWORK(1) )
301 LBBCSDWORKMIN = LBBCSDWORKOPT
302 LRWORKOPT = IBBCSD + LBBCSDWORKOPT - 1
303 LRWORKMIN = IBBCSD + LBBCSDWORKMIN - 1
304 RWORK(1) = LRWORKOPT
305 *
306 * Complex workspace
307 *
308 ITAUP1 = 2
309 ITAUP2 = ITAUP1 + MAX( 1, P )
310 ITAUQ1 = ITAUP2 + MAX( 1, M - P )
311 ITAUQ2 = ITAUQ1 + MAX( 1, Q )
312 IORGQR = ITAUQ2 + MAX( 1, M - Q )
313 CALL ZUNGQR( M-Q, M-Q, M-Q, 0, MAX(1,M-Q), 0, WORK, -1,
314 $ CHILDINFO )
315 LORGQRWORKOPT = INT( WORK(1) )
316 LORGQRWORKMIN = MAX( 1, M - Q )
317 IORGLQ = ITAUQ2 + MAX( 1, M - Q )
318 CALL ZUNGLQ( M-Q, M-Q, M-Q, 0, MAX(1,M-Q), 0, WORK, -1,
319 $ CHILDINFO )
320 LORGLQWORKOPT = INT( WORK(1) )
321 LORGLQWORKMIN = MAX( 1, M - Q )
322 IORBDB = ITAUQ2 + MAX( 1, M - Q )
323 CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12,
324 $ X21, LDX21, X22, LDX22, 0, 0, 0, 0, 0, 0, WORK,
325 $ -1, CHILDINFO )
326 LORBDBWORKOPT = INT( WORK(1) )
327 LORBDBWORKMIN = LORBDBWORKOPT
328 LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT,
329 $ IORBDB + LORBDBWORKOPT ) - 1
330 LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN,
331 $ IORBDB + LORBDBWORKMIN ) - 1
332 WORK(1) = MAX(LWORKOPT,LWORKMIN)
333 *
334 IF( LWORK .LT. LWORKMIN
335 $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
336 INFO = -22
337 ELSE IF( LRWORK .LT. LRWORKMIN
338 $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
339 INFO = -24
340 ELSE
341 LORGQRWORK = LWORK - IORGQR + 1
342 LORGLQWORK = LWORK - IORGLQ + 1
343 LORBDBWORK = LWORK - IORBDB + 1
344 LBBCSDWORK = LRWORK - IBBCSD + 1
345 END IF
346 END IF
347 *
348 * Abort if any illegal arguments
349 *
350 IF( INFO .NE. 0 ) THEN
351 CALL XERBLA( 'ZUNCSD', -INFO )
352 RETURN
353 ELSE IF( LQUERY .OR. LRQUERY ) THEN
354 RETURN
355 END IF
356 *
357 * Transform to bidiagonal block form
358 *
359 CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21,
360 $ LDX21, X22, LDX22, THETA, RWORK(IPHI), WORK(ITAUP1),
361 $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2),
362 $ WORK(IORBDB), LORBDBWORK, CHILDINFO )
363 *
364 * Accumulate Householder reflectors
365 *
366 IF( COLMAJOR ) THEN
367 IF( WANTU1 .AND. P .GT. 0 ) THEN
368 CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 )
369 CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR),
370 $ LORGQRWORK, INFO)
371 END IF
372 IF( WANTU2 .AND. M-P .GT. 0 ) THEN
373 CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 )
374 CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
375 $ WORK(IORGQR), LORGQRWORK, INFO )
376 END IF
377 IF( WANTV1T .AND. Q .GT. 0 ) THEN
378 CALL ZLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2),
379 $ LDV1T )
380 V1T(1, 1) = ONE
381 DO J = 2, Q
382 V1T(1,J) = ZERO
383 V1T(J,1) = ZERO
384 END DO
385 CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
386 $ WORK(IORGLQ), LORGLQWORK, INFO )
387 END IF
388 IF( WANTV2T .AND. M-Q .GT. 0 ) THEN
389 CALL ZLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T )
390 CALL ZLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22,
391 $ V2T(P+1,P+1), LDV2T )
392 CALL ZUNGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
393 $ WORK(IORGLQ), LORGLQWORK, INFO )
394 END IF
395 ELSE
396 IF( WANTU1 .AND. P .GT. 0 ) THEN
397 CALL ZLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 )
398 CALL ZUNGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ),
399 $ LORGLQWORK, INFO)
400 END IF
401 IF( WANTU2 .AND. M-P .GT. 0 ) THEN
402 CALL ZLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 )
403 CALL ZUNGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
404 $ WORK(IORGLQ), LORGLQWORK, INFO )
405 END IF
406 IF( WANTV1T .AND. Q .GT. 0 ) THEN
407 CALL ZLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2),
408 $ LDV1T )
409 V1T(1, 1) = ONE
410 DO J = 2, Q
411 V1T(1,J) = ZERO
412 V1T(J,1) = ZERO
413 END DO
414 CALL ZUNGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
415 $ WORK(IORGQR), LORGQRWORK, INFO )
416 END IF
417 IF( WANTV2T .AND. M-Q .GT. 0 ) THEN
418 CALL ZLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T )
419 CALL ZLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22,
420 $ V2T(P+1,P+1), LDV2T )
421 CALL ZUNGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
422 $ WORK(IORGQR), LORGQRWORK, INFO )
423 END IF
424 END IF
425 *
426 * Compute the CSD of the matrix in bidiagonal-block form
427 *
428 CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA,
429 $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
430 $ LDV2T, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D),
431 $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E),
432 $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD),
433 $ LBBCSDWORK, INFO )
434 *
435 * Permute rows and columns to place identity submatrices in top-
436 * left corner of (1,1)-block and/or bottom-right corner of (1,2)-
437 * block and/or bottom-right corner of (2,1)-block and/or top-left
438 * corner of (2,2)-block
439 *
440 IF( Q .GT. 0 .AND. WANTU2 ) THEN
441 DO I = 1, Q
442 IWORK(I) = M - P - Q + I
443 END DO
444 DO I = Q + 1, M - P
445 IWORK(I) = I - Q
446 END DO
447 IF( COLMAJOR ) THEN
448 CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK )
449 ELSE
450 CALL ZLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK )
451 END IF
452 END IF
453 IF( M .GT. 0 .AND. WANTV2T ) THEN
454 DO I = 1, P
455 IWORK(I) = M - P - Q + I
456 END DO
457 DO I = P + 1, M - Q
458 IWORK(I) = I - P
459 END DO
460 IF( .NOT. COLMAJOR ) THEN
461 CALL ZLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
462 ELSE
463 CALL ZLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
464 END IF
465 END IF
466 *
467 RETURN
468 *
469 * End ZUNCSD
470 *
471 END
472
2 $ SIGNS, M, P, Q, X11, LDX11, X12,
3 $ LDX12, X21, LDX21, X22, LDX22, THETA,
4 $ U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
5 $ LDV2T, WORK, LWORK, RWORK, LRWORK,
6 $ IWORK, INFO )
7 IMPLICIT NONE
8 *
9 * -- LAPACK routine (version 3.3.1) --
10 *
11 * -- Contributed by Brian Sutton of the Randolph-Macon College --
12 * -- November 2010
13 *
14 * -- LAPACK is a software package provided by Univ. of Tennessee, --
15 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
16 *
17 * @precisions normal z -> c
18 *
19 * .. Scalar Arguments ..
20 CHARACTER JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS
21 INTEGER INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12,
22 $ LDX21, LDX22, LRWORK, LWORK, M, P, Q
23 * ..
24 * .. Array Arguments ..
25 INTEGER IWORK( * )
26 DOUBLE PRECISION THETA( * )
27 DOUBLE PRECISION RWORK( * )
28 COMPLEX*16 U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
29 $ V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ),
30 $ X12( LDX12, * ), X21( LDX21, * ), X22( LDX22,
31 $ * )
32 * ..
33 *
34 * Purpose
35 * =======
36 *
37 * ZUNCSD computes the CS decomposition of an M-by-M partitioned
38 * unitary matrix X:
39 *
40 * [ I 0 0 | 0 0 0 ]
41 * [ 0 C 0 | 0 -S 0 ]
42 * [ X11 | X12 ] [ U1 | ] [ 0 0 0 | 0 0 -I ] [ V1 | ]**H
43 * X = [-----------] = [---------] [---------------------] [---------] .
44 * [ X21 | X22 ] [ | U2 ] [ 0 0 0 | I 0 0 ] [ | V2 ]
45 * [ 0 S 0 | 0 C 0 ]
46 * [ 0 0 I | 0 0 0 ]
47 *
48 * X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P,
49 * (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are
50 * R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in
51 * which R = MIN(P,M-P,Q,M-Q).
52 *
53 * Arguments
54 * =========
55 *
56 * JOBU1 (input) CHARACTER
57 * = 'Y': U1 is computed;
58 * otherwise: U1 is not computed.
59 *
60 * JOBU2 (input) CHARACTER
61 * = 'Y': U2 is computed;
62 * otherwise: U2 is not computed.
63 *
64 * JOBV1T (input) CHARACTER
65 * = 'Y': V1T is computed;
66 * otherwise: V1T is not computed.
67 *
68 * JOBV2T (input) CHARACTER
69 * = 'Y': V2T is computed;
70 * otherwise: V2T is not computed.
71 *
72 * TRANS (input) CHARACTER
73 * = 'T': X, U1, U2, V1T, and V2T are stored in row-major
74 * order;
75 * otherwise: X, U1, U2, V1T, and V2T are stored in column-
76 * major order.
77 *
78 * SIGNS (input) CHARACTER
79 * = 'O': The lower-left block is made nonpositive (the
80 * "other" convention);
81 * otherwise: The upper-right block is made nonpositive (the
82 * "default" convention).
83 *
84 * M (input) INTEGER
85 * The number of rows and columns in X.
86 *
87 * P (input) INTEGER
88 * The number of rows in X11 and X12. 0 <= P <= M.
89 *
90 * Q (input) INTEGER
91 * The number of columns in X11 and X21. 0 <= Q <= M.
92 *
93 * X (input/workspace) COMPLEX*16 array, dimension (LDX,M)
94 * On entry, the unitary matrix whose CSD is desired.
95 *
96 * LDX (input) INTEGER
97 * The leading dimension of X. LDX >= MAX(1,M).
98 *
99 * THETA (output) DOUBLE PRECISION array, dimension (R), in which R =
100 * MIN(P,M-P,Q,M-Q).
101 * C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and
102 * S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ).
103 *
104 * U1 (output) COMPLEX*16 array, dimension (P)
105 * If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1.
106 *
107 * LDU1 (input) INTEGER
108 * The leading dimension of U1. If JOBU1 = 'Y', LDU1 >=
109 * MAX(1,P).
110 *
111 * U2 (output) COMPLEX*16 array, dimension (M-P)
112 * If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary
113 * matrix U2.
114 *
115 * LDU2 (input) INTEGER
116 * The leading dimension of U2. If JOBU2 = 'Y', LDU2 >=
117 * MAX(1,M-P).
118 *
119 * V1T (output) COMPLEX*16 array, dimension (Q)
120 * If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary
121 * matrix V1**H.
122 *
123 * LDV1T (input) INTEGER
124 * The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >=
125 * MAX(1,Q).
126 *
127 * V2T (output) COMPLEX*16 array, dimension (M-Q)
128 * If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary
129 * matrix V2**H.
130 *
131 * LDV2T (input) INTEGER
132 * The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >=
133 * MAX(1,M-Q).
134 *
135 * WORK (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
136 * On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
137 *
138 * LWORK (input) INTEGER
139 * The dimension of the array WORK.
140 *
141 * If LWORK = -1, then a workspace query is assumed; the routine
142 * only calculates the optimal size of the WORK array, returns
143 * this value as the first entry of the work array, and no error
144 * message related to LWORK is issued by XERBLA.
145 *
146 * RWORK (workspace) DOUBLE PRECISION array, dimension MAX(1,LRWORK)
147 * On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
148 * If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1),
149 * ..., PHI(R-1) that, together with THETA(1), ..., THETA(R),
150 * define the matrix in intermediate bidiagonal-block form
151 * remaining after nonconvergence. INFO specifies the number
152 * of nonzero PHI's.
153 *
154 * LRWORK (input) INTEGER
155 * The dimension of the array RWORK.
156 *
157 * If LRWORK = -1, then a workspace query is assumed; the routine
158 * only calculates the optimal size of the RWORK array, returns
159 * this value as the first entry of the work array, and no error
160 * message related to LRWORK is issued by XERBLA.
161 *
162 * IWORK (workspace) INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q))
163 *
164 * INFO (output) INTEGER
165 * = 0: successful exit.
166 * < 0: if INFO = -i, the i-th argument had an illegal value.
167 * > 0: ZBBCSD did not converge. See the description of RWORK
168 * above for details.
169 *
170 * Reference
171 * =========
172 *
173 * [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
174 * Algorithms, 50(1):33-65, 2009.
175 *
176 * ===================================================================
177 *
178 * .. Parameters ..
179 DOUBLE PRECISION REALONE
180 PARAMETER ( REALONE = 1.0D0 )
181 COMPLEX*16 NEGONE, ONE, PIOVER2, ZERO
182 PARAMETER ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0),
183 $ PIOVER2 = 1.57079632679489662D0,
184 $ ZERO = (0.0D0,0.0D0) )
185 * ..
186 * .. Local Scalars ..
187 CHARACTER TRANST, SIGNST
188 INTEGER CHILDINFO, I, IB11D, IB11E, IB12D, IB12E,
189 $ IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB,
190 $ IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1,
191 $ ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN,
192 $ LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN,
193 $ LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN,
194 $ LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN,
195 $ LORGQRWORKOPT, LWORKMIN, LWORKOPT
196 LOGICAL COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2,
197 $ WANTV1T, WANTV2T
198 INTEGER LRWORKMIN, LRWORKOPT
199 LOGICAL LRQUERY
200 * ..
201 * .. External Subroutines ..
202 EXTERNAL XERBLA, ZBBCSD, ZLACPY, ZLAPMR, ZLAPMT, ZLASCL,
203 $ ZLASET, ZUNBDB, ZUNGLQ, ZUNGQR
204 * ..
205 * .. External Functions ..
206 LOGICAL LSAME
207 EXTERNAL LSAME
208 * ..
209 * .. Intrinsic Functions
210 INTRINSIC COS, INT, MAX, MIN, SIN
211 * ..
212 * .. Executable Statements ..
213 *
214 * Test input arguments
215 *
216 INFO = 0
217 WANTU1 = LSAME( JOBU1, 'Y' )
218 WANTU2 = LSAME( JOBU2, 'Y' )
219 WANTV1T = LSAME( JOBV1T, 'Y' )
220 WANTV2T = LSAME( JOBV2T, 'Y' )
221 COLMAJOR = .NOT. LSAME( TRANS, 'T' )
222 DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' )
223 LQUERY = LWORK .EQ. -1
224 LRQUERY = LRWORK .EQ. -1
225 IF( M .LT. 0 ) THEN
226 INFO = -7
227 ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
228 INFO = -8
229 ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
230 INFO = -9
231 ELSE IF( ( COLMAJOR .AND. LDX11 .LT. MAX(1,P) ) .OR.
232 $ ( .NOT.COLMAJOR .AND. LDX11 .LT. MAX(1,Q) ) ) THEN
233 INFO = -11
234 ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
235 INFO = -14
236 ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
237 INFO = -16
238 ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
239 INFO = -18
240 ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
241 INFO = -20
242 END IF
243 *
244 * Work with transpose if convenient
245 *
246 IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN
247 IF( COLMAJOR ) THEN
248 TRANST = 'T'
249 ELSE
250 TRANST = 'N'
251 END IF
252 IF( DEFAULTSIGNS ) THEN
253 SIGNST = 'O'
254 ELSE
255 SIGNST = 'D'
256 END IF
257 CALL ZUNCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M,
258 $ Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22,
259 $ LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1,
260 $ U2, LDU2, WORK, LWORK, RWORK, LRWORK, IWORK,
261 $ INFO )
262 RETURN
263 END IF
264 *
265 * Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if
266 * convenient
267 *
268 IF( INFO .EQ. 0 .AND. M-Q .LT. Q ) THEN
269 IF( DEFAULTSIGNS ) THEN
270 SIGNST = 'O'
271 ELSE
272 SIGNST = 'D'
273 END IF
274 CALL ZUNCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M,
275 $ M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11,
276 $ LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T,
277 $ LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
278 RETURN
279 END IF
280 *
281 * Compute workspace
282 *
283 IF( INFO .EQ. 0 ) THEN
284 *
285 * Real workspace
286 *
287 IPHI = 2
288 IB11D = IPHI + MAX( 1, Q - 1 )
289 IB11E = IB11D + MAX( 1, Q )
290 IB12D = IB11E + MAX( 1, Q - 1 )
291 IB12E = IB12D + MAX( 1, Q )
292 IB21D = IB12E + MAX( 1, Q - 1 )
293 IB21E = IB21D + MAX( 1, Q )
294 IB22D = IB21E + MAX( 1, Q - 1 )
295 IB22E = IB22D + MAX( 1, Q )
296 IBBCSD = IB22E + MAX( 1, Q - 1 )
297 CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, 0,
298 $ 0, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, LDV2T, 0,
299 $ 0, 0, 0, 0, 0, 0, 0, RWORK, -1, CHILDINFO )
300 LBBCSDWORKOPT = INT( RWORK(1) )
301 LBBCSDWORKMIN = LBBCSDWORKOPT
302 LRWORKOPT = IBBCSD + LBBCSDWORKOPT - 1
303 LRWORKMIN = IBBCSD + LBBCSDWORKMIN - 1
304 RWORK(1) = LRWORKOPT
305 *
306 * Complex workspace
307 *
308 ITAUP1 = 2
309 ITAUP2 = ITAUP1 + MAX( 1, P )
310 ITAUQ1 = ITAUP2 + MAX( 1, M - P )
311 ITAUQ2 = ITAUQ1 + MAX( 1, Q )
312 IORGQR = ITAUQ2 + MAX( 1, M - Q )
313 CALL ZUNGQR( M-Q, M-Q, M-Q, 0, MAX(1,M-Q), 0, WORK, -1,
314 $ CHILDINFO )
315 LORGQRWORKOPT = INT( WORK(1) )
316 LORGQRWORKMIN = MAX( 1, M - Q )
317 IORGLQ = ITAUQ2 + MAX( 1, M - Q )
318 CALL ZUNGLQ( M-Q, M-Q, M-Q, 0, MAX(1,M-Q), 0, WORK, -1,
319 $ CHILDINFO )
320 LORGLQWORKOPT = INT( WORK(1) )
321 LORGLQWORKMIN = MAX( 1, M - Q )
322 IORBDB = ITAUQ2 + MAX( 1, M - Q )
323 CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12,
324 $ X21, LDX21, X22, LDX22, 0, 0, 0, 0, 0, 0, WORK,
325 $ -1, CHILDINFO )
326 LORBDBWORKOPT = INT( WORK(1) )
327 LORBDBWORKMIN = LORBDBWORKOPT
328 LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT,
329 $ IORBDB + LORBDBWORKOPT ) - 1
330 LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN,
331 $ IORBDB + LORBDBWORKMIN ) - 1
332 WORK(1) = MAX(LWORKOPT,LWORKMIN)
333 *
334 IF( LWORK .LT. LWORKMIN
335 $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
336 INFO = -22
337 ELSE IF( LRWORK .LT. LRWORKMIN
338 $ .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
339 INFO = -24
340 ELSE
341 LORGQRWORK = LWORK - IORGQR + 1
342 LORGLQWORK = LWORK - IORGLQ + 1
343 LORBDBWORK = LWORK - IORBDB + 1
344 LBBCSDWORK = LRWORK - IBBCSD + 1
345 END IF
346 END IF
347 *
348 * Abort if any illegal arguments
349 *
350 IF( INFO .NE. 0 ) THEN
351 CALL XERBLA( 'ZUNCSD', -INFO )
352 RETURN
353 ELSE IF( LQUERY .OR. LRQUERY ) THEN
354 RETURN
355 END IF
356 *
357 * Transform to bidiagonal block form
358 *
359 CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21,
360 $ LDX21, X22, LDX22, THETA, RWORK(IPHI), WORK(ITAUP1),
361 $ WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2),
362 $ WORK(IORBDB), LORBDBWORK, CHILDINFO )
363 *
364 * Accumulate Householder reflectors
365 *
366 IF( COLMAJOR ) THEN
367 IF( WANTU1 .AND. P .GT. 0 ) THEN
368 CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 )
369 CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR),
370 $ LORGQRWORK, INFO)
371 END IF
372 IF( WANTU2 .AND. M-P .GT. 0 ) THEN
373 CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 )
374 CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
375 $ WORK(IORGQR), LORGQRWORK, INFO )
376 END IF
377 IF( WANTV1T .AND. Q .GT. 0 ) THEN
378 CALL ZLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2),
379 $ LDV1T )
380 V1T(1, 1) = ONE
381 DO J = 2, Q
382 V1T(1,J) = ZERO
383 V1T(J,1) = ZERO
384 END DO
385 CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
386 $ WORK(IORGLQ), LORGLQWORK, INFO )
387 END IF
388 IF( WANTV2T .AND. M-Q .GT. 0 ) THEN
389 CALL ZLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T )
390 CALL ZLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22,
391 $ V2T(P+1,P+1), LDV2T )
392 CALL ZUNGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
393 $ WORK(IORGLQ), LORGLQWORK, INFO )
394 END IF
395 ELSE
396 IF( WANTU1 .AND. P .GT. 0 ) THEN
397 CALL ZLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 )
398 CALL ZUNGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ),
399 $ LORGLQWORK, INFO)
400 END IF
401 IF( WANTU2 .AND. M-P .GT. 0 ) THEN
402 CALL ZLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 )
403 CALL ZUNGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
404 $ WORK(IORGLQ), LORGLQWORK, INFO )
405 END IF
406 IF( WANTV1T .AND. Q .GT. 0 ) THEN
407 CALL ZLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2),
408 $ LDV1T )
409 V1T(1, 1) = ONE
410 DO J = 2, Q
411 V1T(1,J) = ZERO
412 V1T(J,1) = ZERO
413 END DO
414 CALL ZUNGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
415 $ WORK(IORGQR), LORGQRWORK, INFO )
416 END IF
417 IF( WANTV2T .AND. M-Q .GT. 0 ) THEN
418 CALL ZLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T )
419 CALL ZLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22,
420 $ V2T(P+1,P+1), LDV2T )
421 CALL ZUNGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
422 $ WORK(IORGQR), LORGQRWORK, INFO )
423 END IF
424 END IF
425 *
426 * Compute the CSD of the matrix in bidiagonal-block form
427 *
428 CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA,
429 $ RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
430 $ LDV2T, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D),
431 $ RWORK(IB12E), RWORK(IB21D), RWORK(IB21E),
432 $ RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD),
433 $ LBBCSDWORK, INFO )
434 *
435 * Permute rows and columns to place identity submatrices in top-
436 * left corner of (1,1)-block and/or bottom-right corner of (1,2)-
437 * block and/or bottom-right corner of (2,1)-block and/or top-left
438 * corner of (2,2)-block
439 *
440 IF( Q .GT. 0 .AND. WANTU2 ) THEN
441 DO I = 1, Q
442 IWORK(I) = M - P - Q + I
443 END DO
444 DO I = Q + 1, M - P
445 IWORK(I) = I - Q
446 END DO
447 IF( COLMAJOR ) THEN
448 CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK )
449 ELSE
450 CALL ZLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK )
451 END IF
452 END IF
453 IF( M .GT. 0 .AND. WANTV2T ) THEN
454 DO I = 1, P
455 IWORK(I) = M - P - Q + I
456 END DO
457 DO I = P + 1, M - Q
458 IWORK(I) = I - P
459 END DO
460 IF( .NOT. COLMAJOR ) THEN
461 CALL ZLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
462 ELSE
463 CALL ZLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
464 END IF
465 END IF
466 *
467 RETURN
468 *
469 * End ZUNCSD
470 *
471 END
472