1       RECURSIVE SUBROUTINE ZUNCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS,
  2      $                             SIGNS, M, P, Q, X11, LDX11, X12,
  3      $                             LDX12, X21, LDX21, X22, LDX22, THETA,
  4      $                             U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
  5      $                             LDV2T, WORK, LWORK, RWORK, LRWORK,
  6      $                             IWORK, INFO )
  7       IMPLICIT NONE
  8 *
  9 *  -- LAPACK routine (version 3.3.1) --
 10 *
 11 *  -- Contributed by Brian Sutton of the Randolph-Macon College --
 12 *  -- November 2010
 13 *
 14 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 15 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--     
 16 *
 17 * @precisions normal z -> c
 18 *
 19 *     .. Scalar Arguments ..
 20       CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS
 21       INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12,
 22      $                   LDX21, LDX22, LRWORK, LWORK, M, P, Q
 23 *     ..
 24 *     .. Array Arguments ..
 25       INTEGER            IWORK( * )
 26       DOUBLE PRECISION   THETA( * )
 27       DOUBLE PRECISION   RWORK( * )
 28       COMPLEX*16         U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
 29      $                   V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ),
 30      $                   X12( LDX12, * ), X21( LDX21, * ), X22( LDX22,
 31      $                   * )
 32 *     ..
 33 *
 34 *  Purpose
 35 *  =======
 36 *
 37 *  ZUNCSD computes the CS decomposition of an M-by-M partitioned
 38 *  unitary matrix X:
 39 *
 40 *                                  [  I  0  0 |  0  0  0 ]
 41 *                                  [  0  C  0 |  0 -S  0 ]
 42 *      [ X11 | X12 ]   [ U1 |    ] [  0  0  0 |  0  0 -I ] [ V1 |    ]**H
 43 *  X = [-----------] = [---------] [---------------------] [---------]   .
 44 *      [ X21 | X22 ]   [    | U2 ] [  0  0  0 |  I  0  0 ] [    | V2 ]
 45 *                                  [  0  S  0 |  0  C  0 ]
 46 *                                  [  0  0  I |  0  0  0 ]
 47 *
 48 *  X11 is P-by-Q. The unitary matrices U1, U2, V1, and V2 are P-by-P,
 49 *  (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are
 50 *  R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in
 51 *  which R = MIN(P,M-P,Q,M-Q).
 52 *
 53 *  Arguments
 54 *  =========
 55 *
 56 *  JOBU1   (input) CHARACTER
 57 *          = 'Y':      U1 is computed;
 58 *          otherwise:  U1 is not computed.
 59 *
 60 *  JOBU2   (input) CHARACTER
 61 *          = 'Y':      U2 is computed;
 62 *          otherwise:  U2 is not computed.
 63 *
 64 *  JOBV1T  (input) CHARACTER
 65 *          = 'Y':      V1T is computed;
 66 *          otherwise:  V1T is not computed.
 67 *
 68 *  JOBV2T  (input) CHARACTER
 69 *          = 'Y':      V2T is computed;
 70 *          otherwise:  V2T is not computed.
 71 *
 72 *  TRANS   (input) CHARACTER
 73 *          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major
 74 *                      order;
 75 *          otherwise:  X, U1, U2, V1T, and V2T are stored in column-
 76 *                      major order.
 77 *
 78 *  SIGNS   (input) CHARACTER
 79 *          = 'O':      The lower-left block is made nonpositive (the
 80 *                      "other" convention);
 81 *          otherwise:  The upper-right block is made nonpositive (the
 82 *                      "default" convention).
 83 *
 84 *  M       (input) INTEGER
 85 *          The number of rows and columns in X.
 86 *
 87 *  P       (input) INTEGER
 88 *          The number of rows in X11 and X12. 0 <= P <= M.
 89 *
 90 *  Q       (input) INTEGER
 91 *          The number of columns in X11 and X21. 0 <= Q <= M.
 92 *
 93 *  X       (input/workspace) COMPLEX*16 array, dimension (LDX,M)
 94 *          On entry, the unitary matrix whose CSD is desired.
 95 *
 96 *  LDX     (input) INTEGER
 97 *          The leading dimension of X. LDX >= MAX(1,M).
 98 *
 99 *  THETA   (output) DOUBLE PRECISION array, dimension (R), in which R =
100 *          MIN(P,M-P,Q,M-Q).
101 *          C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and
102 *          S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ).
103 *
104 *  U1      (output) COMPLEX*16 array, dimension (P)
105 *          If JOBU1 = 'Y', U1 contains the P-by-P unitary matrix U1.
106 *
107 *  LDU1    (input) INTEGER
108 *          The leading dimension of U1. If JOBU1 = 'Y', LDU1 >=
109 *          MAX(1,P).
110 *
111 *  U2      (output) COMPLEX*16 array, dimension (M-P)
112 *          If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) unitary
113 *          matrix U2.
114 *
115 *  LDU2    (input) INTEGER
116 *          The leading dimension of U2. If JOBU2 = 'Y', LDU2 >=
117 *          MAX(1,M-P).
118 *
119 *  V1T     (output) COMPLEX*16 array, dimension (Q)
120 *          If JOBV1T = 'Y', V1T contains the Q-by-Q matrix unitary
121 *          matrix V1**H.
122 *
123 *  LDV1T   (input) INTEGER
124 *          The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >=
125 *          MAX(1,Q).
126 *
127 *  V2T     (output) COMPLEX*16 array, dimension (M-Q)
128 *          If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) unitary
129 *          matrix V2**H.
130 *
131 *  LDV2T   (input) INTEGER
132 *          The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >=
133 *          MAX(1,M-Q).
134 *
135 *  WORK    (workspace) COMPLEX*16 array, dimension (MAX(1,LWORK))
136 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
137 *
138 *  LWORK   (input) INTEGER
139 *          The dimension of the array WORK.
140 *
141 *          If LWORK = -1, then a workspace query is assumed; the routine
142 *          only calculates the optimal size of the WORK array, returns
143 *          this value as the first entry of the work array, and no error
144 *          message related to LWORK is issued by XERBLA.
145 *
146 *  RWORK   (workspace) DOUBLE PRECISION array, dimension MAX(1,LRWORK)
147 *          On exit, if INFO = 0, RWORK(1) returns the optimal LRWORK.
148 *          If INFO > 0 on exit, RWORK(2:R) contains the values PHI(1),
149 *          ..., PHI(R-1) that, together with THETA(1), ..., THETA(R),
150 *          define the matrix in intermediate bidiagonal-block form
151 *          remaining after nonconvergence. INFO specifies the number
152 *          of nonzero PHI's.
153 *
154 *  LRWORK  (input) INTEGER
155 *          The dimension of the array RWORK.
156 *
157 *          If LRWORK = -1, then a workspace query is assumed; the routine
158 *          only calculates the optimal size of the RWORK array, returns
159 *          this value as the first entry of the work array, and no error
160 *          message related to LRWORK is issued by XERBLA.
161 *
162 *  IWORK   (workspace) INTEGER array, dimension (M-MIN(P,M-P,Q,M-Q))
163 *
164 *  INFO    (output) INTEGER
165 *          = 0:  successful exit.
166 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
167 *          > 0:  ZBBCSD did not converge. See the description of RWORK
168 *                above for details.
169 *
170 *  Reference
171 *  =========
172 *
173 *  [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
174 *      Algorithms, 50(1):33-65, 2009.
175 *
176 *  ===================================================================
177 *
178 *     .. Parameters ..
179       DOUBLE PRECISION   REALONE
180       PARAMETER          ( REALONE = 1.0D0 )
181       COMPLEX*16         NEGONE, ONE, PIOVER2, ZERO
182       PARAMETER          ( NEGONE = (-1.0D0,0.0D0), ONE = (1.0D0,0.0D0),
183      $                     PIOVER2 = 1.57079632679489662D0,
184      $                     ZERO = (0.0D0,0.0D0) )
185 *     ..
186 *     .. Local Scalars ..
187       CHARACTER          TRANST, SIGNST
188       INTEGER            CHILDINFO, I, IB11D, IB11E, IB12D, IB12E,
189      $                   IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB,
190      $                   IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1,
191      $                   ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN,
192      $                   LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN,
193      $                   LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN,
194      $                   LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN,
195      $                   LORGQRWORKOPT, LWORKMIN, LWORKOPT
196       LOGICAL            COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2,
197      $                   WANTV1T, WANTV2T
198       INTEGER            LRWORKMIN, LRWORKOPT
199       LOGICAL            LRQUERY
200 *     ..
201 *     .. External Subroutines ..
202       EXTERNAL           XERBLA, ZBBCSD, ZLACPY, ZLAPMR, ZLAPMT, ZLASCL,
203      $                   ZLASET, ZUNBDB, ZUNGLQ, ZUNGQR
204 *     ..
205 *     .. External Functions ..
206       LOGICAL            LSAME
207       EXTERNAL           LSAME
208 *     ..
209 *     .. Intrinsic Functions
210       INTRINSIC          COSINTMAXMINSIN
211 *     ..
212 *     .. Executable Statements ..
213 *
214 *     Test input arguments
215 *
216       INFO = 0
217       WANTU1 = LSAME( JOBU1, 'Y' )
218       WANTU2 = LSAME( JOBU2, 'Y' )
219       WANTV1T = LSAME( JOBV1T, 'Y' )
220       WANTV2T = LSAME( JOBV2T, 'Y' )
221       COLMAJOR = .NOT. LSAME( TRANS, 'T' )
222       DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' )
223       LQUERY = LWORK .EQ. -1
224       LRQUERY = LRWORK .EQ. -1
225       IF( M .LT. 0 ) THEN
226          INFO = -7
227       ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
228          INFO = -8
229       ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
230          INFO = -9
231       ELSE IF( ( COLMAJOR .AND. LDX11 .LT. MAX(1,P) ) .OR.
232      $         ( .NOT.COLMAJOR .AND. LDX11 .LT. MAX(1,Q) ) ) THEN
233          INFO = -11
234       ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
235          INFO = -14
236       ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
237          INFO = -16
238       ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
239          INFO = -18
240       ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
241          INFO = -20
242       END IF
243 *
244 *     Work with transpose if convenient
245 *
246       IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN
247          IF( COLMAJOR ) THEN
248             TRANST = 'T'
249          ELSE
250             TRANST = 'N'
251          END IF
252          IF( DEFAULTSIGNS ) THEN
253             SIGNST = 'O'
254          ELSE
255             SIGNST = 'D'
256          END IF
257          CALL ZUNCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M,
258      $                Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22,
259      $                LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1,
260      $                U2, LDU2, WORK, LWORK, RWORK, LRWORK, IWORK,
261      $                INFO )
262          RETURN
263       END IF
264 *
265 *     Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if
266 *     convenient
267 *
268       IF( INFO .EQ. 0 .AND. M-.LT. Q ) THEN
269          IF( DEFAULTSIGNS ) THEN
270             SIGNST = 'O'
271          ELSE
272             SIGNST = 'D'
273          END IF
274          CALL ZUNCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M,
275      $                M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11,
276      $                LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T,
277      $                LDV1T, WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
278          RETURN
279       END IF
280 *
281 *     Compute workspace
282 *
283       IF( INFO .EQ. 0 ) THEN
284 *
285 *        Real workspace
286 *
287          IPHI = 2
288          IB11D = IPHI + MAX1, Q - 1 )
289          IB11E = IB11D + MAX1, Q )
290          IB12D = IB11E + MAX1, Q - 1 )
291          IB12E = IB12D + MAX1, Q )
292          IB21D = IB12E + MAX1, Q - 1 )
293          IB21E = IB21D + MAX1, Q )
294          IB22D = IB21E + MAX1, Q - 1 )
295          IB22E = IB22D + MAX1, Q )
296          IBBCSD = IB22E + MAX1, Q - 1 )
297          CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, 0,
298      $                0, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, LDV2T, 0,
299      $                0000000, RWORK, -1, CHILDINFO )
300          LBBCSDWORKOPT = INT( RWORK(1) )
301          LBBCSDWORKMIN = LBBCSDWORKOPT
302          LRWORKOPT = IBBCSD + LBBCSDWORKOPT - 1
303          LRWORKMIN = IBBCSD + LBBCSDWORKMIN - 1
304          RWORK(1= LRWORKOPT
305 *
306 *        Complex workspace
307 *
308          ITAUP1 = 2
309          ITAUP2 = ITAUP1 + MAX1, P )
310          ITAUQ1 = ITAUP2 + MAX1, M - P )
311          ITAUQ2 = ITAUQ1 + MAX1, Q )
312          IORGQR = ITAUQ2 + MAX1, M - Q )
313          CALL ZUNGQR( M-Q, M-Q, M-Q, 0MAX(1,M-Q), 0, WORK, -1,
314      $                CHILDINFO )
315          LORGQRWORKOPT = INT( WORK(1) )
316          LORGQRWORKMIN = MAX1, M - Q )
317          IORGLQ = ITAUQ2 + MAX1, M - Q )
318          CALL ZUNGLQ( M-Q, M-Q, M-Q, 0MAX(1,M-Q), 0, WORK, -1,
319      $                CHILDINFO )
320          LORGLQWORKOPT = INT( WORK(1) )
321          LORGLQWORKMIN = MAX1, M - Q )
322          IORBDB = ITAUQ2 + MAX1, M - Q )
323          CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12,
324      $                X21, LDX21, X22, LDX22, 000000, WORK,
325      $                -1, CHILDINFO )
326          LORBDBWORKOPT = INT( WORK(1) )
327          LORBDBWORKMIN = LORBDBWORKOPT
328          LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT,
329      $              IORBDB + LORBDBWORKOPT ) - 1
330          LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN,
331      $              IORBDB + LORBDBWORKMIN ) - 1
332          WORK(1= MAX(LWORKOPT,LWORKMIN)
333 *
334          IF( LWORK .LT. LWORKMIN
335      $       .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
336             INFO = -22
337          ELSE IF( LRWORK .LT. LRWORKMIN
338      $            .AND. .NOT. ( LQUERY .OR. LRQUERY ) ) THEN
339             INFO = -24
340          ELSE
341             LORGQRWORK = LWORK - IORGQR + 1
342             LORGLQWORK = LWORK - IORGLQ + 1
343             LORBDBWORK = LWORK - IORBDB + 1
344             LBBCSDWORK = LRWORK - IBBCSD + 1
345          END IF
346       END IF
347 *
348 *     Abort if any illegal arguments
349 *
350       IF( INFO .NE. 0 ) THEN
351          CALL XERBLA( 'ZUNCSD'-INFO )
352          RETURN
353       ELSE IF( LQUERY .OR. LRQUERY ) THEN
354          RETURN
355       END IF
356 *
357 *     Transform to bidiagonal block form
358 *
359       CALL ZUNBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21,
360      $             LDX21, X22, LDX22, THETA, RWORK(IPHI), WORK(ITAUP1),
361      $             WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2),
362      $             WORK(IORBDB), LORBDBWORK, CHILDINFO )
363 *
364 *     Accumulate Householder reflectors
365 *
366       IF( COLMAJOR ) THEN
367          IF( WANTU1 .AND. P .GT. 0 ) THEN
368             CALL ZLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 )
369             CALL ZUNGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR),
370      $                   LORGQRWORK, INFO)
371          END IF
372          IF( WANTU2 .AND. M-.GT. 0 ) THEN
373             CALL ZLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 )
374             CALL ZUNGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
375      $                   WORK(IORGQR), LORGQRWORK, INFO )
376          END IF
377          IF( WANTV1T .AND. Q .GT. 0 ) THEN
378             CALL ZLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2),
379      $                   LDV1T )
380             V1T(11= ONE
381             DO J = 2, Q
382                V1T(1,J) = ZERO
383                V1T(J,1= ZERO
384             END DO
385             CALL ZUNGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
386      $                   WORK(IORGLQ), LORGLQWORK, INFO )
387          END IF
388          IF( WANTV2T .AND. M-.GT. 0 ) THEN
389             CALL ZLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T )
390             CALL ZLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22,
391      $                   V2T(P+1,P+1), LDV2T )
392             CALL ZUNGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
393      $                   WORK(IORGLQ), LORGLQWORK, INFO )
394          END IF
395       ELSE
396          IF( WANTU1 .AND. P .GT. 0 ) THEN
397             CALL ZLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 )
398             CALL ZUNGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ),
399      $                   LORGLQWORK, INFO)
400          END IF
401          IF( WANTU2 .AND. M-.GT. 0 ) THEN
402             CALL ZLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 )
403             CALL ZUNGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
404      $                   WORK(IORGLQ), LORGLQWORK, INFO )
405          END IF
406          IF( WANTV1T .AND. Q .GT. 0 ) THEN
407             CALL ZLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2),
408      $                   LDV1T )
409             V1T(11= ONE
410             DO J = 2, Q
411                V1T(1,J) = ZERO
412                V1T(J,1= ZERO
413             END DO
414             CALL ZUNGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
415      $                   WORK(IORGQR), LORGQRWORK, INFO )
416          END IF
417          IF( WANTV2T .AND. M-.GT. 0 ) THEN
418             CALL ZLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T )
419             CALL ZLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22,
420      $                   V2T(P+1,P+1), LDV2T )
421             CALL ZUNGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
422      $                   WORK(IORGQR), LORGQRWORK, INFO )
423          END IF
424       END IF
425 *
426 *     Compute the CSD of the matrix in bidiagonal-block form
427 *
428       CALL ZBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA,
429      $             RWORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
430      $             LDV2T, RWORK(IB11D), RWORK(IB11E), RWORK(IB12D),
431      $             RWORK(IB12E), RWORK(IB21D), RWORK(IB21E),
432      $             RWORK(IB22D), RWORK(IB22E), RWORK(IBBCSD),
433      $             LBBCSDWORK, INFO )
434 *
435 *     Permute rows and columns to place identity submatrices in top-
436 *     left corner of (1,1)-block and/or bottom-right corner of (1,2)-
437 *     block and/or bottom-right corner of (2,1)-block and/or top-left
438 *     corner of (2,2)-block 
439 *
440       IF( Q .GT. 0 .AND. WANTU2 ) THEN
441          DO I = 1, Q
442             IWORK(I) = M - P - Q + I
443          END DO
444          DO I = Q + 1, M - P
445             IWORK(I) = I - Q
446          END DO
447          IF( COLMAJOR ) THEN
448             CALL ZLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK )
449          ELSE
450             CALL ZLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK )
451          END IF
452       END IF
453       IF( M .GT. 0 .AND. WANTV2T ) THEN
454          DO I = 1, P
455             IWORK(I) = M - P - Q + I
456          END DO
457          DO I = P + 1, M - Q
458             IWORK(I) = I - P
459          END DO
460          IF.NOT. COLMAJOR ) THEN
461             CALL ZLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
462          ELSE
463             CALL ZLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
464          END IF
465       END IF
466 *
467       RETURN
468 *
469 *     End ZUNCSD
470 *
471       END
472