1       SUBROUTINE DTGSJA( JOBU, JOBV, JOBQ, M, P, N, K, L, A, LDA, B,
  2      $                   LDB, TOLA, TOLB, ALPHA, BETA, U, LDU, V, LDV,
  3      $                   Q, LDQ, WORK, NCYCLE, INFO )
  4 *
  5 *  -- LAPACK routine (version 3.3.1)                                  --
  6 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  7 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  8 *  -- April 2009                                                      --
  9 *
 10 *     .. Scalar Arguments ..
 11       CHARACTER          JOBQ, JOBU, JOBV
 12       INTEGER            INFO, K, L, LDA, LDB, LDQ, LDU, LDV, M, N,
 13      $                   NCYCLE, P
 14       DOUBLE PRECISION   TOLA, TOLB
 15 *     ..
 16 *     .. Array Arguments ..
 17       DOUBLE PRECISION   A( LDA, * ), ALPHA( * ), B( LDB, * ),
 18      $                   BETA( * ), Q( LDQ, * ), U( LDU, * ),
 19      $                   V( LDV, * ), WORK( * )
 20 *     ..
 21 *
 22 *  Purpose
 23 *  =======
 24 *
 25 *  DTGSJA computes the generalized singular value decomposition (GSVD)
 26 *  of two real upper triangular (or trapezoidal) matrices A and B.
 27 *
 28 *  On entry, it is assumed that matrices A and B have the following
 29 *  forms, which may be obtained by the preprocessing subroutine DGGSVP
 30 *  from a general M-by-N matrix A and P-by-N matrix B:
 31 *
 32 *               N-K-L  K    L
 33 *     A =    K ( 0    A12  A13 ) if M-K-L >= 0;
 34 *            L ( 0     0   A23 )
 35 *        M-K-L ( 0     0    0  )
 36 *
 37 *             N-K-L  K    L
 38 *     A =  K ( 0    A12  A13 ) if M-K-L < 0;
 39 *        M-K ( 0     0   A23 )
 40 *
 41 *             N-K-L  K    L
 42 *     B =  L ( 0     0   B13 )
 43 *        P-L ( 0     0    0  )
 44 *
 45 *  where the K-by-K matrix A12 and L-by-L matrix B13 are nonsingular
 46 *  upper triangular; A23 is L-by-L upper triangular if M-K-L >= 0,
 47 *  otherwise A23 is (M-K)-by-L upper trapezoidal.
 48 *
 49 *  On exit,
 50 *
 51 *         U**T *A*Q = D1*( 0 R ),    V**T *B*Q = D2*( 0 R ),
 52 *
 53 *  where U, V and Q are orthogonal matrices.
 54 *  R is a nonsingular upper triangular matrix, and D1 and D2 are
 55 *  ``diagonal'' matrices, which are of the following structures:
 56 *
 57 *  If M-K-L >= 0,
 58 *
 59 *                      K  L
 60 *         D1 =     K ( I  0 )
 61 *                  L ( 0  C )
 62 *              M-K-L ( 0  0 )
 63 *
 64 *                    K  L
 65 *         D2 = L   ( 0  S )
 66 *              P-L ( 0  0 )
 67 *
 68 *                 N-K-L  K    L
 69 *    ( 0 R ) = K (  0   R11  R12 ) K
 70 *              L (  0    0   R22 ) L
 71 *
 72 *  where
 73 *
 74 *    C = diag( ALPHA(K+1), ... , ALPHA(K+L) ),
 75 *    S = diag( BETA(K+1),  ... , BETA(K+L) ),
 76 *    C**2 + S**2 = I.
 77 *
 78 *    R is stored in A(1:K+L,N-K-L+1:N) on exit.
 79 *
 80 *  If M-K-L < 0,
 81 *
 82 *                 K M-K K+L-M
 83 *      D1 =   K ( I  0    0   )
 84 *           M-K ( 0  C    0   )
 85 *
 86 *                   K M-K K+L-M
 87 *      D2 =   M-K ( 0  S    0   )
 88 *           K+L-M ( 0  0    I   )
 89 *             P-L ( 0  0    0   )
 90 *
 91 *                 N-K-L  K   M-K  K+L-M
 92 * ( 0 R ) =    K ( 0    R11  R12  R13  )
 93 *            M-K ( 0     0   R22  R23  )
 94 *          K+L-M ( 0     0    0   R33  )
 95 *
 96 *  where
 97 *  C = diag( ALPHA(K+1), ... , ALPHA(M) ),
 98 *  S = diag( BETA(K+1),  ... , BETA(M) ),
 99 *  C**2 + S**2 = I.
100 *
101 *  R = ( R11 R12 R13 ) is stored in A(1:M, N-K-L+1:N) and R33 is stored
102 *      (  0  R22 R23 )
103 *  in B(M-K+1:L,N+M-K-L+1:N) on exit.
104 *
105 *  The computation of the orthogonal transformation matrices U, V or Q
106 *  is optional.  These matrices may either be formed explicitly, or they
107 *  may be postmultiplied into input matrices U1, V1, or Q1.
108 *
109 *  Arguments
110 *  =========
111 *
112 *  JOBU    (input) CHARACTER*1
113 *          = 'U':  U must contain an orthogonal matrix U1 on entry, and
114 *                  the product U1*U is returned;
115 *          = 'I':  U is initialized to the unit matrix, and the
116 *                  orthogonal matrix U is returned;
117 *          = 'N':  U is not computed.
118 *
119 *  JOBV    (input) CHARACTER*1
120 *          = 'V':  V must contain an orthogonal matrix V1 on entry, and
121 *                  the product V1*V is returned;
122 *          = 'I':  V is initialized to the unit matrix, and the
123 *                  orthogonal matrix V is returned;
124 *          = 'N':  V is not computed.
125 *
126 *  JOBQ    (input) CHARACTER*1
127 *          = 'Q':  Q must contain an orthogonal matrix Q1 on entry, and
128 *                  the product Q1*Q is returned;
129 *          = 'I':  Q is initialized to the unit matrix, and the
130 *                  orthogonal matrix Q is returned;
131 *          = 'N':  Q is not computed.
132 *
133 *  M       (input) INTEGER
134 *          The number of rows of the matrix A.  M >= 0.
135 *
136 *  P       (input) INTEGER
137 *          The number of rows of the matrix B.  P >= 0.
138 *
139 *  N       (input) INTEGER
140 *          The number of columns of the matrices A and B.  N >= 0.
141 *
142 *  K       (input) INTEGER
143 *  L       (input) INTEGER
144 *          K and L specify the subblocks in the input matrices A and B:
145 *          A23 = A(K+1:MIN(K+L,M),N-L+1:N) and B13 = B(1:L,N-L+1:N)
146 *          of A and B, whose GSVD is going to be computed by DTGSJA.
147 *          See Further Details.
148 *
149 *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
150 *          On entry, the M-by-N matrix A.
151 *          On exit, A(N-K+1:N,1:MIN(K+L,M) ) contains the triangular
152 *          matrix R or part of R.  See Purpose for details.
153 *
154 *  LDA     (input) INTEGER
155 *          The leading dimension of the array A. LDA >= max(1,M).
156 *
157 *  B       (input/output) DOUBLE PRECISION array, dimension (LDB,N)
158 *          On entry, the P-by-N matrix B.
159 *          On exit, if necessary, B(M-K+1:L,N+M-K-L+1:N) contains
160 *          a part of R.  See Purpose for details.
161 *
162 *  LDB     (input) INTEGER
163 *          The leading dimension of the array B. LDB >= max(1,P).
164 *
165 *  TOLA    (input) DOUBLE PRECISION
166 *  TOLB    (input) DOUBLE PRECISION
167 *          TOLA and TOLB are the convergence criteria for the Jacobi-
168 *          Kogbetliantz iteration procedure. Generally, they are the
169 *          same as used in the preprocessing step, say
170 *              TOLA = max(M,N)*norm(A)*MAZHEPS,
171 *              TOLB = max(P,N)*norm(B)*MAZHEPS.
172 *
173 *  ALPHA   (output) DOUBLE PRECISION array, dimension (N)
174 *  BETA    (output) DOUBLE PRECISION array, dimension (N)
175 *          On exit, ALPHA and BETA contain the generalized singular
176 *          value pairs of A and B;
177 *            ALPHA(1:K) = 1,
178 *            BETA(1:K)  = 0,
179 *          and if M-K-L >= 0,
180 *            ALPHA(K+1:K+L) = diag(C),
181 *            BETA(K+1:K+L)  = diag(S),
182 *          or if M-K-L < 0,
183 *            ALPHA(K+1:M)= C, ALPHA(M+1:K+L)= 0
184 *            BETA(K+1:M) = S, BETA(M+1:K+L) = 1.
185 *          Furthermore, if K+L < N,
186 *            ALPHA(K+L+1:N) = 0 and
187 *            BETA(K+L+1:N)  = 0.
188 *
189 *  U       (input/output) DOUBLE PRECISION array, dimension (LDU,M)
190 *          On entry, if JOBU = 'U', U must contain a matrix U1 (usually
191 *          the orthogonal matrix returned by DGGSVP).
192 *          On exit,
193 *          if JOBU = 'I', U contains the orthogonal matrix U;
194 *          if JOBU = 'U', U contains the product U1*U.
195 *          If JOBU = 'N', U is not referenced.
196 *
197 *  LDU     (input) INTEGER
198 *          The leading dimension of the array U. LDU >= max(1,M) if
199 *          JOBU = 'U'; LDU >= 1 otherwise.
200 *
201 *  V       (input/output) DOUBLE PRECISION array, dimension (LDV,P)
202 *          On entry, if JOBV = 'V', V must contain a matrix V1 (usually
203 *          the orthogonal matrix returned by DGGSVP).
204 *          On exit,
205 *          if JOBV = 'I', V contains the orthogonal matrix V;
206 *          if JOBV = 'V', V contains the product V1*V.
207 *          If JOBV = 'N', V is not referenced.
208 *
209 *  LDV     (input) INTEGER
210 *          The leading dimension of the array V. LDV >= max(1,P) if
211 *          JOBV = 'V'; LDV >= 1 otherwise.
212 *
213 *  Q       (input/output) DOUBLE PRECISION array, dimension (LDQ,N)
214 *          On entry, if JOBQ = 'Q', Q must contain a matrix Q1 (usually
215 *          the orthogonal matrix returned by DGGSVP).
216 *          On exit,
217 *          if JOBQ = 'I', Q contains the orthogonal matrix Q;
218 *          if JOBQ = 'Q', Q contains the product Q1*Q.
219 *          If JOBQ = 'N', Q is not referenced.
220 *
221 *  LDQ     (input) INTEGER
222 *          The leading dimension of the array Q. LDQ >= max(1,N) if
223 *          JOBQ = 'Q'; LDQ >= 1 otherwise.
224 *
225 *  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)
226 *
227 *  NCYCLE  (output) INTEGER
228 *          The number of cycles required for convergence.
229 *
230 *  INFO    (output) INTEGER
231 *          = 0:  successful exit
232 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
233 *          = 1:  the procedure does not converge after MAXIT cycles.
234 *
235 *  Internal Parameters
236 *  ===================
237 *
238 *  MAXIT   INTEGER
239 *          MAXIT specifies the total loops that the iterative procedure
240 *          may take. If after MAXIT cycles, the routine fails to
241 *          converge, we return INFO = 1.
242 *
243 *  Further Details
244 *  ===============
245 *
246 *  DTGSJA essentially uses a variant of Kogbetliantz algorithm to reduce
247 *  min(L,M-K)-by-L triangular (or trapezoidal) matrix A23 and L-by-L
248 *  matrix B13 to the form:
249 *
250 *           U1**T *A13*Q1 = C1*R1; V1**T *B13*Q1 = S1*R1,
251 *
252 *  where U1, V1 and Q1 are orthogonal matrix, and Z**T is the transpose
253 *  of Z.  C1 and S1 are diagonal matrices satisfying
254 *
255 *                C1**2 + S1**2 = I,
256 *
257 *  and R1 is an L-by-L nonsingular upper triangular matrix.
258 *
259 *  =====================================================================
260 *
261 *     .. Parameters ..
262       INTEGER            MAXIT
263       PARAMETER          ( MAXIT = 40 )
264       DOUBLE PRECISION   ZERO, ONE
265       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
266 *     ..
267 *     .. Local Scalars ..
268 *
269       LOGICAL            INITQ, INITU, INITV, UPPER, WANTQ, WANTU, WANTV
270       INTEGER            I, J, KCYCLE
271       DOUBLE PRECISION   A1, A2, A3, B1, B2, B3, CSQ, CSU, CSV, ERROR,
272      $                   GAMMA, RWK, SNQ, SNU, SNV, SSMIN
273 *     ..
274 *     .. External Functions ..
275       LOGICAL            LSAME
276       EXTERNAL           LSAME
277 *     ..
278 *     .. External Subroutines ..
279       EXTERNAL           DCOPY, DLAGS2, DLAPLL, DLARTG, DLASET, DROT,
280      $                   DSCAL, XERBLA
281 *     ..
282 *     .. Intrinsic Functions ..
283       INTRINSIC          ABSMAXMIN
284 *     ..
285 *     .. Executable Statements ..
286 *
287 *     Decode and test the input parameters
288 *
289       INITU = LSAME( JOBU, 'I' )
290       WANTU = INITU .OR. LSAME( JOBU, 'U' )
291 *
292       INITV = LSAME( JOBV, 'I' )
293       WANTV = INITV .OR. LSAME( JOBV, 'V' )
294 *
295       INITQ = LSAME( JOBQ, 'I' )
296       WANTQ = INITQ .OR. LSAME( JOBQ, 'Q' )
297 *
298       INFO = 0
299       IF.NOT.( INITU .OR. WANTU .OR. LSAME( JOBU, 'N' ) ) ) THEN
300          INFO = -1
301       ELSE IF.NOT.( INITV .OR. WANTV .OR. LSAME( JOBV, 'N' ) ) ) THEN
302          INFO = -2
303       ELSE IF.NOT.( INITQ .OR. WANTQ .OR. LSAME( JOBQ, 'N' ) ) ) THEN
304          INFO = -3
305       ELSE IF( M.LT.0 ) THEN
306          INFO = -4
307       ELSE IF( P.LT.0 ) THEN
308          INFO = -5
309       ELSE IF( N.LT.0 ) THEN
310          INFO = -6
311       ELSE IF( LDA.LT.MAX1, M ) ) THEN
312          INFO = -10
313       ELSE IF( LDB.LT.MAX1, P ) ) THEN
314          INFO = -12
315       ELSE IF( LDU.LT.1 .OR. ( WANTU .AND. LDU.LT.M ) ) THEN
316          INFO = -18
317       ELSE IF( LDV.LT.1 .OR. ( WANTV .AND. LDV.LT.P ) ) THEN
318          INFO = -20
319       ELSE IF( LDQ.LT.1 .OR. ( WANTQ .AND. LDQ.LT.N ) ) THEN
320          INFO = -22
321       END IF
322       IF( INFO.NE.0 ) THEN
323          CALL XERBLA( 'DTGSJA'-INFO )
324          RETURN
325       END IF
326 *
327 *     Initialize U, V and Q, if necessary
328 *
329       IF( INITU )
330      $   CALL DLASET( 'Full', M, M, ZERO, ONE, U, LDU )
331       IF( INITV )
332      $   CALL DLASET( 'Full', P, P, ZERO, ONE, V, LDV )
333       IF( INITQ )
334      $   CALL DLASET( 'Full', N, N, ZERO, ONE, Q, LDQ )
335 *
336 *     Loop until convergence
337 *
338       UPPER = .FALSE.
339       DO 40 KCYCLE = 1, MAXIT
340 *
341          UPPER = .NOT.UPPER
342 *
343          DO 20 I = 1, L - 1
344             DO 10 J = I + 1, L
345 *
346                A1 = ZERO
347                A2 = ZERO
348                A3 = ZERO
349                IF( K+I.LE.M )
350      $            A1 = A( K+I, N-L+I )
351                IF( K+J.LE.M )
352      $            A3 = A( K+J, N-L+J )
353 *
354                B1 = B( I, N-L+I )
355                B3 = B( J, N-L+J )
356 *
357                IF( UPPER ) THEN
358                   IF( K+I.LE.M )
359      $               A2 = A( K+I, N-L+J )
360                   B2 = B( I, N-L+J )
361                ELSE
362                   IF( K+J.LE.M )
363      $               A2 = A( K+J, N-L+I )
364                   B2 = B( J, N-L+I )
365                END IF
366 *
367                CALL DLAGS2( UPPER, A1, A2, A3, B1, B2, B3, CSU, SNU,
368      $                      CSV, SNV, CSQ, SNQ )
369 *
370 *              Update (K+I)-th and (K+J)-th rows of matrix A: U**T *A
371 *
372                IF( K+J.LE.M )
373      $            CALL DROT( L, A( K+J, N-L+1 ), LDA, A( K+I, N-L+1 ),
374      $                       LDA, CSU, SNU )
375 *
376 *              Update I-th and J-th rows of matrix B: V**T *B
377 *
378                CALL DROT( L, B( J, N-L+1 ), LDB, B( I, N-L+1 ), LDB,
379      $                    CSV, SNV )
380 *
381 *              Update (N-L+I)-th and (N-L+J)-th columns of matrices
382 *              A and B: A*Q and B*Q
383 *
384                CALL DROT( MIN( K+L, M ), A( 1, N-L+J ), 1,
385      $                    A( 1, N-L+I ), 1, CSQ, SNQ )
386 *
387                CALL DROT( L, B( 1, N-L+J ), 1, B( 1, N-L+I ), 1, CSQ,
388      $                    SNQ )
389 *
390                IF( UPPER ) THEN
391                   IF( K+I.LE.M )
392      $               A( K+I, N-L+J ) = ZERO
393                   B( I, N-L+J ) = ZERO
394                ELSE
395                   IF( K+J.LE.M )
396      $               A( K+J, N-L+I ) = ZERO
397                   B( J, N-L+I ) = ZERO
398                END IF
399 *
400 *              Update orthogonal matrices U, V, Q, if desired.
401 *
402                IF( WANTU .AND. K+J.LE.M )
403      $            CALL DROT( M, U( 1, K+J ), 1, U( 1, K+I ), 1, CSU,
404      $                       SNU )
405 *
406                IF( WANTV )
407      $            CALL DROT( P, V( 1, J ), 1, V( 1, I ), 1, CSV, SNV )
408 *
409                IF( WANTQ )
410      $            CALL DROT( N, Q( 1, N-L+J ), 1, Q( 1, N-L+I ), 1, CSQ,
411      $                       SNQ )
412 *
413    10       CONTINUE
414    20    CONTINUE
415 *
416          IF.NOT.UPPER ) THEN
417 *
418 *           The matrices A13 and B13 were lower triangular at the start
419 *           of the cycle, and are now upper triangular.
420 *
421 *           Convergence test: test the parallelism of the corresponding
422 *           rows of A and B.
423 *
424             ERROR = ZERO
425             DO 30 I = 1MIN( L, M-K )
426                CALL DCOPY( L-I+1, A( K+I, N-L+I ), LDA, WORK, 1 )
427                CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, WORK( L+1 ), 1 )
428                CALL DLAPLL( L-I+1, WORK, 1, WORK( L+1 ), 1, SSMIN )
429                ERROR = MAX( ERROR, SSMIN )
430    30       CONTINUE
431 *
432             IFABS( ERROR ).LE.MIN( TOLA, TOLB ) )
433      $         GO TO 50
434          END IF
435 *
436 *        End of cycle loop
437 *
438    40 CONTINUE
439 *
440 *     The algorithm has not converged after MAXIT cycles.
441 *
442       INFO = 1
443       GO TO 100
444 *
445    50 CONTINUE
446 *
447 *     If ERROR <= MIN(TOLA,TOLB), then the algorithm has converged.
448 *     Compute the generalized singular value pairs (ALPHA, BETA), and
449 *     set the triangular matrix R to array A.
450 *
451       DO 60 I = 1, K
452          ALPHA( I ) = ONE
453          BETA( I ) = ZERO
454    60 CONTINUE
455 *
456       DO 70 I = 1MIN( L, M-K )
457 *
458          A1 = A( K+I, N-L+I )
459          B1 = B( I, N-L+I )
460 *
461          IF( A1.NE.ZERO ) THEN
462             GAMMA = B1 / A1
463 *
464 *           change sign if necessary
465 *
466             IFGAMMA.LT.ZERO ) THEN
467                CALL DSCAL( L-I+1-ONE, B( I, N-L+I ), LDB )
468                IF( WANTV )
469      $            CALL DSCAL( P, -ONE, V( 1, I ), 1 )
470             END IF
471 *
472             CALL DLARTG( ABSGAMMA ), ONE, BETA( K+I ), ALPHA( K+I ),
473      $                   RWK )
474 *
475             IF( ALPHA( K+I ).GE.BETA( K+I ) ) THEN
476                CALL DSCAL( L-I+1, ONE / ALPHA( K+I ), A( K+I, N-L+I ),
477      $                     LDA )
478             ELSE
479                CALL DSCAL( L-I+1, ONE / BETA( K+I ), B( I, N-L+I ),
480      $                     LDB )
481                CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
482      $                     LDA )
483             END IF
484 *
485          ELSE
486 *
487             ALPHA( K+I ) = ZERO
488             BETA( K+I ) = ONE
489             CALL DCOPY( L-I+1, B( I, N-L+I ), LDB, A( K+I, N-L+I ),
490      $                  LDA )
491 *
492          END IF
493 *
494    70 CONTINUE
495 *
496 *     Post-assignment
497 *
498       DO 80 I = M + 1, K + L
499          ALPHA( I ) = ZERO
500          BETA( I ) = ONE
501    80 CONTINUE
502 *
503       IF( K+L.LT.N ) THEN
504          DO 90 I = K + L + 1, N
505             ALPHA( I ) = ZERO
506             BETA( I ) = ZERO
507    90    CONTINUE
508       END IF
509 *
510   100 CONTINUE
511       NCYCLE = KCYCLE
512       RETURN
513 *
514 *     End of DTGSJA
515 *
516       END