1       RECURSIVE SUBROUTINE DORCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS,
  2      $                             SIGNS, M, P, Q, X11, LDX11, X12,
  3      $                             LDX12, X21, LDX21, X22, LDX22, THETA,
  4      $                             U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
  5      $                             LDV2T, WORK, LWORK, IWORK, INFO )
  6       IMPLICIT NONE
  7 *
  8 *  -- LAPACK routine (version 3.3.1) --
  9 *
 10 *  -- Contributed by Brian Sutton of the Randolph-Macon College --
 11 *  -- November 2010
 12 *
 13 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 14 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--     
 15 *
 16 * @precisions normal d -> s
 17 *
 18 *     .. Scalar Arguments ..
 19       CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, SIGNS, TRANS
 20       INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LDX11, LDX12,
 21      $                   LDX21, LDX22, LWORK, M, P, Q
 22 *     ..
 23 *     .. Array Arguments ..
 24       INTEGER            IWORK( * )
 25       DOUBLE PRECISION   THETA( * )
 26       DOUBLE PRECISION   U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
 27      $                   V2T( LDV2T, * ), WORK( * ), X11( LDX11, * ),
 28      $                   X12( LDX12, * ), X21( LDX21, * ), X22( LDX22,
 29      $                   * )
 30 *     ..
 31 *
 32 *  Purpose
 33 *  =======
 34 *
 35 *  DORCSD computes the CS decomposition of an M-by-M partitioned
 36 *  orthogonal matrix X:
 37 *
 38 *                                  [  I  0  0 |  0  0  0 ]
 39 *                                  [  0  C  0 |  0 -S  0 ]
 40 *      [ X11 | X12 ]   [ U1 |    ] [  0  0  0 |  0  0 -I ] [ V1 |    ]**T
 41 *  X = [-----------] = [---------] [---------------------] [---------]   .
 42 *      [ X21 | X22 ]   [    | U2 ] [  0  0  0 |  I  0  0 ] [    | V2 ]
 43 *                                  [  0  S  0 |  0  C  0 ]
 44 *                                  [  0  0  I |  0  0  0 ]
 45 *
 46 *  X11 is P-by-Q. The orthogonal matrices U1, U2, V1, and V2 are P-by-P,
 47 *  (M-P)-by-(M-P), Q-by-Q, and (M-Q)-by-(M-Q), respectively. C and S are
 48 *  R-by-R nonnegative diagonal matrices satisfying C^2 + S^2 = I, in
 49 *  which R = MIN(P,M-P,Q,M-Q).
 50 *
 51 *  Arguments
 52 *  =========
 53 *
 54 *  JOBU1   (input) CHARACTER
 55 *          = 'Y':      U1 is computed;
 56 *          otherwise:  U1 is not computed.
 57 *
 58 *  JOBU2   (input) CHARACTER
 59 *          = 'Y':      U2 is computed;
 60 *          otherwise:  U2 is not computed.
 61 *
 62 *  JOBV1T  (input) CHARACTER
 63 *          = 'Y':      V1T is computed;
 64 *          otherwise:  V1T is not computed.
 65 *
 66 *  JOBV2T  (input) CHARACTER
 67 *          = 'Y':      V2T is computed;
 68 *          otherwise:  V2T is not computed.
 69 *
 70 *  TRANS   (input) CHARACTER
 71 *          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major
 72 *                      order;
 73 *          otherwise:  X, U1, U2, V1T, and V2T are stored in column-
 74 *                      major order.
 75 *
 76 *  SIGNS   (input) CHARACTER
 77 *          = 'O':      The lower-left block is made nonpositive (the
 78 *                      "other" convention);
 79 *          otherwise:  The upper-right block is made nonpositive (the
 80 *                      "default" convention).
 81 *
 82 *  M       (input) INTEGER
 83 *          The number of rows and columns in X.
 84 *
 85 *  P       (input) INTEGER
 86 *          The number of rows in X11 and X12. 0 <= P <= M.
 87 *
 88 *  Q       (input) INTEGER
 89 *          The number of columns in X11 and X21. 0 <= Q <= M.
 90 *
 91 *  X       (input/workspace) DOUBLE PRECISION array, dimension (LDX,M)
 92 *          On entry, the orthogonal matrix whose CSD is desired.
 93 *
 94 *  LDX     (input) INTEGER
 95 *          The leading dimension of X. LDX >= MAX(1,M).
 96 *
 97 *  THETA   (output) DOUBLE PRECISION array, dimension (R), in which R =
 98 *          MIN(P,M-P,Q,M-Q).
 99 *          C = DIAG( COS(THETA(1)), ... , COS(THETA(R)) ) and
100 *          S = DIAG( SIN(THETA(1)), ... , SIN(THETA(R)) ).
101 *
102 *  U1      (output) DOUBLE PRECISION array, dimension (P)
103 *          If JOBU1 = 'Y', U1 contains the P-by-P orthogonal matrix U1.
104 *
105 *  LDU1    (input) INTEGER
106 *          The leading dimension of U1. If JOBU1 = 'Y', LDU1 >=
107 *          MAX(1,P).
108 *
109 *  U2      (output) DOUBLE PRECISION array, dimension (M-P)
110 *          If JOBU2 = 'Y', U2 contains the (M-P)-by-(M-P) orthogonal
111 *          matrix U2.
112 *
113 *  LDU2    (input) INTEGER
114 *          The leading dimension of U2. If JOBU2 = 'Y', LDU2 >=
115 *          MAX(1,M-P).
116 *
117 *  V1T     (output) DOUBLE PRECISION array, dimension (Q)
118 *          If JOBV1T = 'Y', V1T contains the Q-by-Q matrix orthogonal
119 *          matrix V1**T.
120 *
121 *  LDV1T   (input) INTEGER
122 *          The leading dimension of V1T. If JOBV1T = 'Y', LDV1T >=
123 *          MAX(1,Q).
124 *
125 *  V2T     (output) DOUBLE PRECISION array, dimension (M-Q)
126 *          If JOBV2T = 'Y', V2T contains the (M-Q)-by-(M-Q) orthogonal
127 *          matrix V2**T.
128 *
129 *  LDV2T   (input) INTEGER
130 *          The leading dimension of V2T. If JOBV2T = 'Y', LDV2T >=
131 *          MAX(1,M-Q).
132 *
133 *  WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
134 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
135 *          If INFO > 0 on exit, WORK(2:R) contains the values PHI(1),
136 *          ..., PHI(R-1) that, together with THETA(1), ..., THETA(R),
137 *          define the matrix in intermediate bidiagonal-block form
138 *          remaining after nonconvergence. INFO specifies the number
139 *          of nonzero PHI's.
140 *
141 *  LWORK   (input) INTEGER
142 *          The dimension of the array WORK.
143 *
144 *          If LWORK = -1, then a workspace query is assumed; the routine
145 *          only calculates the optimal size of the WORK array, returns
146 *          this value as the first entry of the work array, and no error
147 *          message related to LWORK is issued by XERBLA.
148 *
149 *  IWORK   (workspace) INTEGER array, dimension (M-MIN(P, M-P, Q, M-Q))
150 *
151 *  INFO    (output) INTEGER
152 *          = 0:  successful exit.
153 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
154 *          > 0:  DBBCSD did not converge. See the description of WORK
155 *                above for details.
156 *
157 *  Reference
158 *  =========
159 *
160 *  [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
161 *      Algorithms, 50(1):33-65, 2009.
162 *
163 *  ===================================================================
164 *
165 *     .. Parameters ..
166       DOUBLE PRECISION   REALONE
167       PARAMETER          ( REALONE = 1.0D0 )
168       DOUBLE PRECISION   NEGONE, ONE, PIOVER2, ZERO
169       PARAMETER          ( NEGONE = -1.0D0, ONE = 1.0D0,
170      $                     PIOVER2 = 1.57079632679489662D0,
171      $                     ZERO = 0.0D0 )
172 *     ..
173 *     .. Local Scalars ..
174       CHARACTER          TRANST, SIGNST
175       INTEGER            CHILDINFO, I, IB11D, IB11E, IB12D, IB12E,
176      $                   IB21D, IB21E, IB22D, IB22E, IBBCSD, IORBDB,
177      $                   IORGLQ, IORGQR, IPHI, ITAUP1, ITAUP2, ITAUQ1,
178      $                   ITAUQ2, J, LBBCSDWORK, LBBCSDWORKMIN,
179      $                   LBBCSDWORKOPT, LORBDBWORK, LORBDBWORKMIN,
180      $                   LORBDBWORKOPT, LORGLQWORK, LORGLQWORKMIN,
181      $                   LORGLQWORKOPT, LORGQRWORK, LORGQRWORKMIN,
182      $                   LORGQRWORKOPT, LWORKMIN, LWORKOPT
183       LOGICAL            COLMAJOR, DEFAULTSIGNS, LQUERY, WANTU1, WANTU2,
184      $                   WANTV1T, WANTV2T
185 *     ..
186 *     .. External Subroutines ..
187       EXTERNAL           DBBCSD, DLACPY, DLAPMR, DLAPMT, DLASCL, DLASET,
188      $                   DORBDB, DORGLQ, DORGQR, XERBLA
189 *     ..
190 *     .. External Functions ..
191       LOGICAL            LSAME
192       EXTERNAL           LSAME
193 *     ..
194 *     .. Intrinsic Functions
195       INTRINSIC          COSINTMAXMINSIN
196 *     ..
197 *     .. Executable Statements ..
198 *
199 *     Test input arguments
200 *
201       INFO = 0
202       WANTU1 = LSAME( JOBU1, 'Y' )
203       WANTU2 = LSAME( JOBU2, 'Y' )
204       WANTV1T = LSAME( JOBV1T, 'Y' )
205       WANTV2T = LSAME( JOBV2T, 'Y' )
206       COLMAJOR = .NOT. LSAME( TRANS, 'T' )
207       DEFAULTSIGNS = .NOT. LSAME( SIGNS, 'O' )
208       LQUERY = LWORK .EQ. -1
209       IF( M .LT. 0 ) THEN
210          INFO = -7
211       ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
212          INFO = -8
213       ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
214          INFO = -9
215       ELSE IF( ( COLMAJOR .AND. LDX11 .LT. MAX(1,P) ) .OR.
216      $         ( .NOT.COLMAJOR .AND. LDX11 .LT. MAX(1,Q) ) ) THEN
217          INFO = -11
218       ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
219          INFO = -14
220       ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
221          INFO = -16
222       ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
223          INFO = -18
224       ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
225          INFO = -20
226       END IF
227 *
228 *     Work with transpose if convenient
229 *
230       IF( INFO .EQ. 0 .AND. MIN( P, M-P ) .LT. MIN( Q, M-Q ) ) THEN
231          IF( COLMAJOR ) THEN
232             TRANST = 'T'
233          ELSE
234             TRANST = 'N'
235          END IF
236          IF( DEFAULTSIGNS ) THEN
237             SIGNST = 'O'
238          ELSE
239             SIGNST = 'D'
240          END IF
241          CALL DORCSD( JOBV1T, JOBV2T, JOBU1, JOBU2, TRANST, SIGNST, M,
242      $                Q, P, X11, LDX11, X21, LDX21, X12, LDX12, X22,
243      $                LDX22, THETA, V1T, LDV1T, V2T, LDV2T, U1, LDU1,
244      $                U2, LDU2, WORK, LWORK, IWORK, INFO )
245          RETURN
246       END IF
247 *
248 *     Work with permutation [ 0 I; I 0 ] * X * [ 0 I; I 0 ] if
249 *     convenient
250 *
251       IF( INFO .EQ. 0 .AND. M-.LT. Q ) THEN
252          IF( DEFAULTSIGNS ) THEN
253             SIGNST = 'O'
254          ELSE
255             SIGNST = 'D'
256          END IF
257          CALL DORCSD( JOBU2, JOBU1, JOBV2T, JOBV1T, TRANS, SIGNST, M,
258      $                M-P, M-Q, X22, LDX22, X21, LDX21, X12, LDX12, X11,
259      $                LDX11, THETA, U2, LDU2, U1, LDU1, V2T, LDV2T, V1T,
260      $                LDV1T, WORK, LWORK, IWORK, INFO )
261          RETURN
262       END IF
263 *
264 *     Compute workspace
265 *
266       IF( INFO .EQ. 0 ) THEN
267 *
268          IPHI = 2
269          ITAUP1 = IPHI + MAX1, Q - 1 )
270          ITAUP2 = ITAUP1 + MAX1, P )
271          ITAUQ1 = ITAUP2 + MAX1, M - P )
272          ITAUQ2 = ITAUQ1 + MAX1, Q )
273          IORGQR = ITAUQ2 + MAX1, M - Q )
274          CALL DORGQR( M-Q, M-Q, M-Q, 0MAX(1,M-Q), 0, WORK, -1,
275      $                CHILDINFO )
276          LORGQRWORKOPT = INT( WORK(1) )
277          LORGQRWORKMIN = MAX1, M - Q )
278          IORGLQ = ITAUQ2 + MAX1, M - Q )
279          CALL DORGLQ( M-Q, M-Q, M-Q, 0MAX(1,M-Q), 0, WORK, -1,
280      $                CHILDINFO )
281          LORGLQWORKOPT = INT( WORK(1) )
282          LORGLQWORKMIN = MAX1, M - Q )
283          IORBDB = ITAUQ2 + MAX1, M - Q )
284          CALL DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12,
285      $                X21, LDX21, X22, LDX22, 000000, WORK,
286      $                -1, CHILDINFO )
287          LORBDBWORKOPT = INT( WORK(1) )
288          LORBDBWORKMIN = LORBDBWORKOPT
289          IB11D = ITAUQ2 + MAX1, M - Q )
290          IB11E = IB11D + MAX1, Q )
291          IB12D = IB11E + MAX1, Q - 1 )
292          IB12E = IB12D + MAX1, Q )
293          IB21D = IB12E + MAX1, Q - 1 )
294          IB21E = IB21D + MAX1, Q )
295          IB22D = IB21E + MAX1, Q - 1 )
296          IB22E = IB22D + MAX1, Q )
297          IBBCSD = IB22E + MAX1, Q - 1 )
298          CALL DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, 0,
299      $                0, U1, LDU1, U2, LDU2, V1T, LDV1T, V2T, LDV2T, 0,
300      $                0000000, WORK, -1, CHILDINFO )
301          LBBCSDWORKOPT = INT( WORK(1) )
302          LBBCSDWORKMIN = LBBCSDWORKOPT
303          LWORKOPT = MAX( IORGQR + LORGQRWORKOPT, IORGLQ + LORGLQWORKOPT,
304      $              IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKOPT ) - 1
305          LWORKMIN = MAX( IORGQR + LORGQRWORKMIN, IORGLQ + LORGLQWORKMIN,
306      $              IORBDB + LORBDBWORKOPT, IBBCSD + LBBCSDWORKMIN ) - 1
307          WORK(1= MAX(LWORKOPT,LWORKMIN)
308 *
309          IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN
310             INFO = -22
311          ELSE
312             LORGQRWORK = LWORK - IORGQR + 1
313             LORGLQWORK = LWORK - IORGLQ + 1
314             LORBDBWORK = LWORK - IORBDB + 1
315             LBBCSDWORK = LWORK - IBBCSD + 1
316          END IF
317       END IF
318 *
319 *     Abort if any illegal arguments
320 *
321       IF( INFO .NE. 0 ) THEN
322          CALL XERBLA( 'DORCSD'-INFO )
323          RETURN
324       ELSE IF( LQUERY ) THEN
325          RETURN
326       END IF
327 *
328 *     Transform to bidiagonal block form
329 *
330       CALL DORBDB( TRANS, SIGNS, M, P, Q, X11, LDX11, X12, LDX12, X21,
331      $             LDX21, X22, LDX22, THETA, WORK(IPHI), WORK(ITAUP1),
332      $             WORK(ITAUP2), WORK(ITAUQ1), WORK(ITAUQ2),
333      $             WORK(IORBDB), LORBDBWORK, CHILDINFO )
334 *
335 *     Accumulate Householder reflectors
336 *
337       IF( COLMAJOR ) THEN
338          IF( WANTU1 .AND. P .GT. 0 ) THEN
339             CALL DLACPY( 'L', P, Q, X11, LDX11, U1, LDU1 )
340             CALL DORGQR( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGQR),
341      $                   LORGQRWORK, INFO)
342          END IF
343          IF( WANTU2 .AND. M-.GT. 0 ) THEN
344             CALL DLACPY( 'L', M-P, Q, X21, LDX21, U2, LDU2 )
345             CALL DORGQR( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
346      $                   WORK(IORGQR), LORGQRWORK, INFO )
347          END IF
348          IF( WANTV1T .AND. Q .GT. 0 ) THEN
349             CALL DLACPY( 'U', Q-1, Q-1, X11(1,2), LDX11, V1T(2,2),
350      $                   LDV1T )
351             V1T(11= ONE
352             DO J = 2, Q
353                V1T(1,J) = ZERO
354                V1T(J,1= ZERO
355             END DO
356             CALL DORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
357      $                   WORK(IORGLQ), LORGLQWORK, INFO )
358          END IF
359          IF( WANTV2T .AND. M-.GT. 0 ) THEN
360             CALL DLACPY( 'U', P, M-Q, X12, LDX12, V2T, LDV2T )
361             CALL DLACPY( 'U', M-P-Q, M-P-Q, X22(Q+1,P+1), LDX22,
362      $                   V2T(P+1,P+1), LDV2T )
363             CALL DORGLQ( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
364      $                   WORK(IORGLQ), LORGLQWORK, INFO )
365          END IF
366       ELSE
367          IF( WANTU1 .AND. P .GT. 0 ) THEN
368             CALL DLACPY( 'U', Q, P, X11, LDX11, U1, LDU1 )
369             CALL DORGLQ( P, P, Q, U1, LDU1, WORK(ITAUP1), WORK(IORGLQ),
370      $                   LORGLQWORK, INFO)
371          END IF
372          IF( WANTU2 .AND. M-.GT. 0 ) THEN
373             CALL DLACPY( 'U', Q, M-P, X21, LDX21, U2, LDU2 )
374             CALL DORGLQ( M-P, M-P, Q, U2, LDU2, WORK(ITAUP2),
375      $                   WORK(IORGLQ), LORGLQWORK, INFO )
376          END IF
377          IF( WANTV1T .AND. Q .GT. 0 ) THEN
378             CALL DLACPY( 'L', Q-1, Q-1, X11(2,1), LDX11, V1T(2,2),
379      $                   LDV1T )
380             V1T(11= ONE
381             DO J = 2, Q
382                V1T(1,J) = ZERO
383                V1T(J,1= ZERO
384             END DO
385             CALL DORGQR( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
386      $                   WORK(IORGQR), LORGQRWORK, INFO )
387          END IF
388          IF( WANTV2T .AND. M-.GT. 0 ) THEN
389             CALL DLACPY( 'L', M-Q, P, X12, LDX12, V2T, LDV2T )
390             CALL DLACPY( 'L', M-P-Q, M-P-Q, X22(P+1,Q+1), LDX22,
391      $                   V2T(P+1,P+1), LDV2T )
392             CALL DORGQR( M-Q, M-Q, M-Q, V2T, LDV2T, WORK(ITAUQ2),
393      $                   WORK(IORGQR), LORGQRWORK, INFO )
394          END IF
395       END IF
396 *
397 *     Compute the CSD of the matrix in bidiagonal-block form
398 *
399       CALL DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q, THETA,
400      $             WORK(IPHI), U1, LDU1, U2, LDU2, V1T, LDV1T, V2T,
401      $             LDV2T, WORK(IB11D), WORK(IB11E), WORK(IB12D),
402      $             WORK(IB12E), WORK(IB21D), WORK(IB21E), WORK(IB22D),
403      $             WORK(IB22E), WORK(IBBCSD), LBBCSDWORK, INFO )
404 *
405 *     Permute rows and columns to place identity submatrices in top-
406 *     left corner of (1,1)-block and/or bottom-right corner of (1,2)-
407 *     block and/or bottom-right corner of (2,1)-block and/or top-left
408 *     corner of (2,2)-block 
409 *
410       IF( Q .GT. 0 .AND. WANTU2 ) THEN
411          DO I = 1, Q
412             IWORK(I) = M - P - Q + I
413          END DO
414          DO I = Q + 1, M - P
415             IWORK(I) = I - Q
416          END DO
417          IF( COLMAJOR ) THEN
418             CALL DLAPMT( .FALSE., M-P, M-P, U2, LDU2, IWORK )
419          ELSE
420             CALL DLAPMR( .FALSE., M-P, M-P, U2, LDU2, IWORK )
421          END IF
422       END IF
423       IF( M .GT. 0 .AND. WANTV2T ) THEN
424          DO I = 1, P
425             IWORK(I) = M - P - Q + I
426          END DO
427          DO I = P + 1, M - Q
428             IWORK(I) = I - P
429          END DO
430          IF.NOT. COLMAJOR ) THEN
431             CALL DLAPMT( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
432          ELSE
433             CALL DLAPMR( .FALSE., M-Q, M-Q, V2T, LDV2T, IWORK )
434          END IF
435       END IF
436 *
437       RETURN
438 *
439 *     End DORCSD
440 *
441       END
442