1       SUBROUTINE DBBCSD( JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS, M, P, Q,
  2      $                   THETA, PHI, U1, LDU1, U2, LDU2, V1T, LDV1T,
  3      $                   V2T, LDV2T, B11D, B11E, B12D, B12E, B21D, B21E,
  4      $                   B22D, B22E, WORK, LWORK, INFO )
  5       IMPLICIT NONE
  6 *
  7 *  -- LAPACK routine (version 3.3.0) --
  8 *
  9 *  -- Contributed by Brian Sutton of the Randolph-Macon College --
 10 *  -- November 2010
 11 *
 12 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 13 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--     
 14 *
 15 *     .. Scalar Arguments ..
 16       CHARACTER          JOBU1, JOBU2, JOBV1T, JOBV2T, TRANS
 17       INTEGER            INFO, LDU1, LDU2, LDV1T, LDV2T, LWORK, M, P, Q
 18 *     ..
 19 *     .. Array Arguments ..
 20       DOUBLE PRECISION   B11D( * ), B11E( * ), B12D( * ), B12E( * ),
 21      $                   B21D( * ), B21E( * ), B22D( * ), B22E( * ),
 22      $                   PHI( * ), THETA( * ), WORK( * )
 23       DOUBLE PRECISION   U1( LDU1, * ), U2( LDU2, * ), V1T( LDV1T, * ),
 24      $                   V2T( LDV2T, * )
 25 *     ..
 26 *
 27 *  Purpose
 28 *  =======
 29 *
 30 *  DBBCSD computes the CS decomposition of an orthogonal matrix in
 31 *  bidiagonal-block form,
 32 *
 33 *
 34 *      [ B11 | B12 0  0 ]
 35 *      [  0  |  0 -I  0 ]
 36 *  X = [----------------]
 37 *      [ B21 | B22 0  0 ]
 38 *      [  0  |  0  0  I ]
 39 *
 40 *                                [  C | -S  0  0 ]
 41 *                    [ U1 |    ] [  0 |  0 -I  0 ] [ V1 |    ]**T
 42 *                  = [---------] [---------------] [---------]   .
 43 *                    [    | U2 ] [  S |  C  0  0 ] [    | V2 ]
 44 *                                [  0 |  0  0  I ]
 45 *
 46 *  X is M-by-M, its top-left block is P-by-Q, and Q must be no larger
 47 *  than P, M-P, or M-Q. (If Q is not the smallest index, then X must be
 48 *  transposed and/or permuted. This can be done in constant time using
 49 *  the TRANS and SIGNS options. See DORCSD for details.)
 50 *
 51 *  The bidiagonal matrices B11, B12, B21, and B22 are represented
 52 *  implicitly by angles THETA(1:Q) and PHI(1:Q-1).
 53 *
 54 *  The orthogonal matrices U1, U2, V1T, and V2T are input/output.
 55 *  The input matrices are pre- or post-multiplied by the appropriate
 56 *  singular vector matrices.
 57 *
 58 *  Arguments
 59 *  =========
 60 *
 61 *  JOBU1   (input) CHARACTER
 62 *          = 'Y':      U1 is updated;
 63 *          otherwise:  U1 is not updated.
 64 *
 65 *  JOBU2   (input) CHARACTER
 66 *          = 'Y':      U2 is updated;
 67 *          otherwise:  U2 is not updated.
 68 *
 69 *  JOBV1T  (input) CHARACTER
 70 *          = 'Y':      V1T is updated;
 71 *          otherwise:  V1T is not updated.
 72 *
 73 *  JOBV2T  (input) CHARACTER
 74 *          = 'Y':      V2T is updated;
 75 *          otherwise:  V2T is not updated.
 76 *
 77 *  TRANS   (input) CHARACTER
 78 *          = 'T':      X, U1, U2, V1T, and V2T are stored in row-major
 79 *                      order;
 80 *          otherwise:  X, U1, U2, V1T, and V2T are stored in column-
 81 *                      major order.
 82 *
 83 *  M       (input) INTEGER
 84 *          The number of rows and columns in X, the orthogonal matrix in
 85 *          bidiagonal-block form.
 86 *
 87 *  P       (input) INTEGER
 88 *          The number of rows in the top-left block of X. 0 <= P <= M.
 89 *
 90 *  Q       (input) INTEGER
 91 *          The number of columns in the top-left block of X.
 92 *          0 <= Q <= MIN(P,M-P,M-Q).
 93 *
 94 *  THETA   (input/output) DOUBLE PRECISION array, dimension (Q)
 95 *          On entry, the angles THETA(1),...,THETA(Q) that, along with
 96 *          PHI(1), ...,PHI(Q-1), define the matrix in bidiagonal-block
 97 *          form. On exit, the angles whose cosines and sines define the
 98 *          diagonal blocks in the CS decomposition.
 99 *
100 *  PHI     (input/workspace) DOUBLE PRECISION array, dimension (Q-1)
101 *          The angles PHI(1),...,PHI(Q-1) that, along with THETA(1),...,
102 *          THETA(Q), define the matrix in bidiagonal-block form.
103 *
104 *  U1      (input/output) DOUBLE PRECISION array, dimension (LDU1,P)
105 *          On entry, an LDU1-by-P matrix. On exit, U1 is postmultiplied
106 *          by the left singular vector matrix common to [ B11 ; 0 ] and
107 *          [ B12 0 0 ; 0 -I 0 0 ].
108 *
109 *  LDU1    (input) INTEGER
110 *          The leading dimension of the array U1.
111 *
112 *  U2      (input/output) DOUBLE PRECISION array, dimension (LDU2,M-P)
113 *          On entry, an LDU2-by-(M-P) matrix. On exit, U2 is
114 *          postmultiplied by the left singular vector matrix common to
115 *          [ B21 ; 0 ] and [ B22 0 0 ; 0 0 I ].
116 *
117 *  LDU2    (input) INTEGER
118 *          The leading dimension of the array U2.
119 *
120 *  V1T     (input/output) DOUBLE PRECISION array, dimension (LDV1T,Q)
121 *          On entry, a LDV1T-by-Q matrix. On exit, V1T is premultiplied
122 *          by the transpose of the right singular vector
123 *          matrix common to [ B11 ; 0 ] and [ B21 ; 0 ].
124 *
125 *  LDV1T   (input) INTEGER
126 *          The leading dimension of the array V1T.
127 *
128 *  V2T     (input/output) DOUBLE PRECISION array, dimenison (LDV2T,M-Q)
129 *          On entry, a LDV2T-by-(M-Q) matrix. On exit, V2T is
130 *          premultiplied by the transpose of the right
131 *          singular vector matrix common to [ B12 0 0 ; 0 -I 0 ] and
132 *          [ B22 0 0 ; 0 0 I ].
133 *
134 *  LDV2T   (input) INTEGER
135 *          The leading dimension of the array V2T.
136 *
137 *  B11D    (output) DOUBLE PRECISION array, dimension (Q)
138 *          When DBBCSD converges, B11D contains the cosines of THETA(1),
139 *          ..., THETA(Q). If DBBCSD fails to converge, then B11D
140 *          contains the diagonal of the partially reduced top-left
141 *          block.
142 *
143 *  B11E    (output) DOUBLE PRECISION array, dimension (Q-1)
144 *          When DBBCSD converges, B11E contains zeros. If DBBCSD fails
145 *          to converge, then B11E contains the superdiagonal of the
146 *          partially reduced top-left block.
147 *
148 *  B12D    (output) DOUBLE PRECISION array, dimension (Q)
149 *          When DBBCSD converges, B12D contains the negative sines of
150 *          THETA(1), ..., THETA(Q). If DBBCSD fails to converge, then
151 *          B12D contains the diagonal of the partially reduced top-right
152 *          block.
153 *
154 *  B12E    (output) DOUBLE PRECISION array, dimension (Q-1)
155 *          When DBBCSD converges, B12E contains zeros. If DBBCSD fails
156 *          to converge, then B12E contains the subdiagonal of the
157 *          partially reduced top-right block.
158 *
159 *  WORK    (workspace) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
160 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
161 *
162 *  LWORK   (input) INTEGER
163 *          The dimension of the array WORK. LWORK >= MAX(1,8*Q).
164 *
165 *          If LWORK = -1, then a workspace query is assumed; the
166 *          routine only calculates the optimal size of the WORK array,
167 *          returns this value as the first entry of the work array, and
168 *          no error message related to LWORK is issued by XERBLA.
169 *
170 *  INFO    (output) INTEGER
171 *          = 0:  successful exit.
172 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
173 *          > 0:  if DBBCSD did not converge, INFO specifies the number
174 *                of nonzero entries in PHI, and B11D, B11E, etc.,
175 *                contain the partially reduced matrix.
176 *
177 *  Reference
178 *  =========
179 *
180 *  [1] Brian D. Sutton. Computing the complete CS decomposition. Numer.
181 *      Algorithms, 50(1):33-65, 2009.
182 *
183 *  Internal Parameters
184 *  ===================
185 *
186 *  TOLMUL  DOUBLE PRECISION, default = MAX(10,MIN(100,EPS**(-1/8)))
187 *          TOLMUL controls the convergence criterion of the QR loop.
188 *          Angles THETA(i), PHI(i) are rounded to 0 or PI/2 when they
189 *          are within TOLMUL*EPS of either bound.
190 *
191 *  ===================================================================
192 *
193 *     .. Parameters ..
194       INTEGER            MAXITR
195       PARAMETER          ( MAXITR = 6 )
196       DOUBLE PRECISION   HUNDRED, MEIGHTH, ONE, PIOVER2, TEN, ZERO
197       PARAMETER          ( HUNDRED = 100.0D0, MEIGHTH = -0.125D0,
198      $                     ONE = 1.0D0, PIOVER2 = 1.57079632679489662D0,
199      $                     TEN = 10.0D0, ZERO = 0.0D0 )
200       DOUBLE PRECISION   NEGONECOMPLEX
201       PARAMETER          ( NEGONECOMPLEX = -1.0D0 )
202 *     ..
203 *     .. Local Scalars ..
204       LOGICAL            COLMAJOR, LQUERY, RESTART11, RESTART12,
205      $                   RESTART21, RESTART22, WANTU1, WANTU2, WANTV1T,
206      $                   WANTV2T
207       INTEGER            I, IMIN, IMAX, ITER, IU1CS, IU1SN, IU2CS,
208      $                   IU2SN, IV1TCS, IV1TSN, IV2TCS, IV2TSN, J,
209      $                   LWORKMIN, LWORKOPT, MAXIT, MINI
210       DOUBLE PRECISION   B11BULGE, B12BULGE, B21BULGE, B22BULGE, DUMMY,
211      $                   EPS, MU, NU, R, SIGMA11, SIGMA21,
212      $                   TEMP, THETAMAX, THETAMIN, THRESH, TOL, TOLMUL,
213      $                   UNFL, X1, X2, Y1, Y2
214 *
215 *     .. External Subroutines ..
216       EXTERNAL           DLASR, DSCAL, DSWAP, DLARTGP, DLARTGS, DLAS2,
217      $                   XERBLA
218 *     ..
219 *     .. External Functions ..
220       DOUBLE PRECISION   DLAMCH
221       LOGICAL            LSAME
222       EXTERNAL           LSAME, DLAMCH
223 *     ..
224 *     .. Intrinsic Functions ..
225       INTRINSIC          ABSATAN2COSMAXMINSINSQRT
226 *     ..
227 *     .. Executable Statements ..
228 *
229 *     Test input arguments
230 *
231       INFO = 0
232       LQUERY = LWORK .EQ. -1
233       WANTU1 = LSAME( JOBU1, 'Y' )
234       WANTU2 = LSAME( JOBU2, 'Y' )
235       WANTV1T = LSAME( JOBV1T, 'Y' )
236       WANTV2T = LSAME( JOBV2T, 'Y' )
237       COLMAJOR = .NOT. LSAME( TRANS, 'T' )
238 *
239       IF( M .LT. 0 ) THEN
240          INFO = -6
241       ELSE IF( P .LT. 0 .OR. P .GT. M ) THEN
242          INFO = -7
243       ELSE IF( Q .LT. 0 .OR. Q .GT. M ) THEN
244          INFO = -8
245       ELSE IF( Q .GT. P .OR. Q .GT. M-.OR. Q .GT. M-Q ) THEN
246          INFO = -8
247       ELSE IF( WANTU1 .AND. LDU1 .LT. P ) THEN
248          INFO = -12
249       ELSE IF( WANTU2 .AND. LDU2 .LT. M-P ) THEN
250          INFO = -14
251       ELSE IF( WANTV1T .AND. LDV1T .LT. Q ) THEN
252          INFO = -16
253       ELSE IF( WANTV2T .AND. LDV2T .LT. M-Q ) THEN
254          INFO = -18
255       END IF
256 *
257 *     Quick return if Q = 0
258 *
259       IF( INFO .EQ. 0 .AND. Q .EQ. 0 ) THEN
260          LWORKMIN = 1
261          WORK(1= LWORKMIN
262          RETURN
263       END IF
264 *
265 *     Compute workspace
266 *
267       IF( INFO .EQ. 0 ) THEN
268          IU1CS = 1
269          IU1SN = IU1CS + Q
270          IU2CS = IU1SN + Q
271          IU2SN = IU2CS + Q
272          IV1TCS = IU2SN + Q
273          IV1TSN = IV1TCS + Q
274          IV2TCS = IV1TSN + Q
275          IV2TSN = IV2TCS + Q
276          LWORKOPT = IV2TSN + Q - 1
277          LWORKMIN = LWORKOPT
278          WORK(1= LWORKOPT
279          IF( LWORK .LT. LWORKMIN .AND. .NOT. LQUERY ) THEN
280             INFO = -28
281          END IF
282       END IF
283 *
284       IF( INFO .NE. 0 ) THEN
285          CALL XERBLA( 'DBBCSD'-INFO )
286          RETURN
287       ELSE IF( LQUERY ) THEN
288          RETURN
289       END IF
290 *
291 *     Get machine constants
292 *
293       EPS = DLAMCH( 'Epsilon' )
294       UNFL = DLAMCH( 'Safe minimum' )
295       TOLMUL = MAX( TEN, MIN( HUNDRED, EPS**MEIGHTH ) )
296       TOL = TOLMUL*EPS
297       THRESH = MAX( TOL, MAXITR*Q*Q*UNFL )
298 *
299 *     Test for negligible sines or cosines
300 *
301       DO I = 1, Q
302          IF( THETA(I) .LT. THRESH ) THEN
303             THETA(I) = ZERO
304          ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
305             THETA(I) = PIOVER2
306          END IF
307       END DO
308       DO I = 1, Q-1
309          IF( PHI(I) .LT. THRESH ) THEN
310             PHI(I) = ZERO
311          ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
312             PHI(I) = PIOVER2
313          END IF
314       END DO
315 *
316 *     Initial deflation
317 *
318       IMAX = Q
319       DO WHILE( ( IMAX .GT. 1 ) .AND. ( PHI(IMAX-1.EQ. ZERO ) )
320          IMAX = IMAX - 1
321       END DO
322       IMIN = IMAX - 1
323       IF  ( IMIN .GT. 1 ) THEN
324          DO WHILE( PHI(IMIN-1.NE. ZERO )
325             IMIN = IMIN - 1
326             IF  ( IMIN .LE. 1 ) EXIT
327          END DO
328       END IF
329 *
330 *     Initialize iteration counter
331 *
332       MAXIT = MAXITR*Q*Q
333       ITER = 0
334 *
335 *     Begin main iteration loop
336 *
337       DO WHILE( IMAX .GT. 1 )
338 *
339 *        Compute the matrix entries
340 *
341          B11D(IMIN) = COS( THETA(IMIN) )
342          B21D(IMIN) = -SIN( THETA(IMIN) )
343          DO I = IMIN, IMAX - 1
344             B11E(I) = -SIN( THETA(I) ) * SIN( PHI(I) )
345             B11D(I+1= COS( THETA(I+1) ) * COS( PHI(I) )
346             B12D(I) = SIN( THETA(I) ) * COS( PHI(I) )
347             B12E(I) = COS( THETA(I+1) ) * SIN( PHI(I) )
348             B21E(I) = -COS( THETA(I) ) * SIN( PHI(I) )
349             B21D(I+1= -SIN( THETA(I+1) ) * COS( PHI(I) )
350             B22D(I) = COS( THETA(I) ) * COS( PHI(I) )
351             B22E(I) = -SIN( THETA(I+1) ) * SIN( PHI(I) )
352          END DO
353          B12D(IMAX) = SIN( THETA(IMAX) )
354          B22D(IMAX) = COS( THETA(IMAX) )
355 *
356 *        Abort if not converging; otherwise, increment ITER
357 *
358          IF( ITER .GT. MAXIT ) THEN
359             INFO = 0
360             DO I = 1, Q
361                IF( PHI(I) .NE. ZERO )
362      $            INFO = INFO + 1
363             END DO
364             RETURN
365          END IF
366 *
367          ITER = ITER + IMAX - IMIN
368 *
369 *        Compute shifts
370 *
371          THETAMAX = THETA(IMIN)
372          THETAMIN = THETA(IMIN)
373          DO I = IMIN+1, IMAX
374             IF( THETA(I) > THETAMAX )
375      $         THETAMAX = THETA(I)
376             IF( THETA(I) < THETAMIN )
377      $         THETAMIN = THETA(I)
378          END DO
379 *
380          IF( THETAMAX .GT. PIOVER2 - THRESH ) THEN
381 *
382 *           Zero on diagonals of B11 and B22; induce deflation with a
383 *           zero shift
384 *
385             MU = ZERO
386             NU = ONE
387 *
388          ELSE IF( THETAMIN .LT. THRESH ) THEN
389 *
390 *           Zero on diagonals of B12 and B22; induce deflation with a
391 *           zero shift
392 *
393             MU = ONE
394             NU = ZERO
395 *
396          ELSE
397 *
398 *           Compute shifts for B11 and B21 and use the lesser
399 *
400             CALL DLAS2( B11D(IMAX-1), B11E(IMAX-1), B11D(IMAX), SIGMA11,
401      $                  DUMMY )
402             CALL DLAS2( B21D(IMAX-1), B21E(IMAX-1), B21D(IMAX), SIGMA21,
403      $                  DUMMY )
404 *
405             IF( SIGMA11 .LE. SIGMA21 ) THEN
406                MU = SIGMA11
407                NU = SQRT( ONE - MU**2 )
408                IF( MU .LT. THRESH ) THEN
409                   MU = ZERO
410                   NU = ONE
411                END IF
412             ELSE
413                NU = SIGMA21
414                MU = SQRT1.0 - NU**2 )
415                IF( NU .LT. THRESH ) THEN
416                   MU = ONE
417                   NU = ZERO
418                END IF
419             END IF
420          END IF
421 *
422 *        Rotate to produce bulges in B11 and B21
423 *
424          IF( MU .LE. NU ) THEN
425             CALL DLARTGS( B11D(IMIN), B11E(IMIN), MU,
426      $                    WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
427          ELSE
428             CALL DLARTGS( B21D(IMIN), B21E(IMIN), NU,
429      $                    WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1) )
430          END IF
431 *
432          TEMP = WORK(IV1TCS+IMIN-1)*B11D(IMIN) +
433      $          WORK(IV1TSN+IMIN-1)*B11E(IMIN)
434          B11E(IMIN) = WORK(IV1TCS+IMIN-1)*B11E(IMIN) -
435      $                WORK(IV1TSN+IMIN-1)*B11D(IMIN)
436          B11D(IMIN) = TEMP
437          B11BULGE = WORK(IV1TSN+IMIN-1)*B11D(IMIN+1)
438          B11D(IMIN+1= WORK(IV1TCS+IMIN-1)*B11D(IMIN+1)
439          TEMP = WORK(IV1TCS+IMIN-1)*B21D(IMIN) +
440      $          WORK(IV1TSN+IMIN-1)*B21E(IMIN)
441          B21E(IMIN) = WORK(IV1TCS+IMIN-1)*B21E(IMIN) -
442      $                WORK(IV1TSN+IMIN-1)*B21D(IMIN)
443          B21D(IMIN) = TEMP
444          B21BULGE = WORK(IV1TSN+IMIN-1)*B21D(IMIN+1)
445          B21D(IMIN+1= WORK(IV1TCS+IMIN-1)*B21D(IMIN+1)
446 *
447 *        Compute THETA(IMIN)
448 *
449          THETA( IMIN ) = ATAN2SQRT( B21D(IMIN)**2+B21BULGE**2 ),
450      $                   SQRT( B11D(IMIN)**2+B11BULGE**2 ) )
451 *
452 *        Chase the bulges in B11(IMIN+1,IMIN) and B21(IMIN+1,IMIN)
453 *
454          IF( B11D(IMIN)**2+B11BULGE**2 .GT. THRESH**2 ) THEN
455             CALL DLARTGP( B11BULGE, B11D(IMIN), WORK(IU1SN+IMIN-1),
456      $                    WORK(IU1CS+IMIN-1), R )
457          ELSE IF( MU .LE. NU ) THEN
458             CALL DLARTGS( B11E( IMIN ), B11D( IMIN + 1 ), MU,
459      $                    WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
460          ELSE
461             CALL DLARTGS( B12D( IMIN ), B12E( IMIN ), NU,
462      $                    WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1) )
463          END IF
464          IF( B21D(IMIN)**2+B21BULGE**2 .GT. THRESH**2 ) THEN
465             CALL DLARTGP( B21BULGE, B21D(IMIN), WORK(IU2SN+IMIN-1),
466      $                    WORK(IU2CS+IMIN-1), R )
467          ELSE IF( NU .LT. MU ) THEN
468             CALL DLARTGS( B21E( IMIN ), B21D( IMIN + 1 ), NU,
469      $                    WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
470          ELSE
471             CALL DLARTGS( B22D(IMIN), B22E(IMIN), MU,
472      $                    WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1) )
473          END IF
474          WORK(IU2CS+IMIN-1= -WORK(IU2CS+IMIN-1)
475          WORK(IU2SN+IMIN-1= -WORK(IU2SN+IMIN-1)
476 *
477          TEMP = WORK(IU1CS+IMIN-1)*B11E(IMIN) +
478      $          WORK(IU1SN+IMIN-1)*B11D(IMIN+1)
479          B11D(IMIN+1= WORK(IU1CS+IMIN-1)*B11D(IMIN+1-
480      $                  WORK(IU1SN+IMIN-1)*B11E(IMIN)
481          B11E(IMIN) = TEMP
482          IF( IMAX .GT. IMIN+1 ) THEN
483             B11BULGE = WORK(IU1SN+IMIN-1)*B11E(IMIN+1)
484             B11E(IMIN+1= WORK(IU1CS+IMIN-1)*B11E(IMIN+1)
485          END IF
486          TEMP = WORK(IU1CS+IMIN-1)*B12D(IMIN) +
487      $          WORK(IU1SN+IMIN-1)*B12E(IMIN)
488          B12E(IMIN) = WORK(IU1CS+IMIN-1)*B12E(IMIN) -
489      $                WORK(IU1SN+IMIN-1)*B12D(IMIN)
490          B12D(IMIN) = TEMP
491          B12BULGE = WORK(IU1SN+IMIN-1)*B12D(IMIN+1)
492          B12D(IMIN+1= WORK(IU1CS+IMIN-1)*B12D(IMIN+1)
493          TEMP = WORK(IU2CS+IMIN-1)*B21E(IMIN) +
494      $          WORK(IU2SN+IMIN-1)*B21D(IMIN+1)
495          B21D(IMIN+1= WORK(IU2CS+IMIN-1)*B21D(IMIN+1-
496      $                  WORK(IU2SN+IMIN-1)*B21E(IMIN)
497          B21E(IMIN) = TEMP
498          IF( IMAX .GT. IMIN+1 ) THEN
499             B21BULGE = WORK(IU2SN+IMIN-1)*B21E(IMIN+1)
500             B21E(IMIN+1= WORK(IU2CS+IMIN-1)*B21E(IMIN+1)
501          END IF
502          TEMP = WORK(IU2CS+IMIN-1)*B22D(IMIN) +
503      $          WORK(IU2SN+IMIN-1)*B22E(IMIN)
504          B22E(IMIN) = WORK(IU2CS+IMIN-1)*B22E(IMIN) -
505      $                WORK(IU2SN+IMIN-1)*B22D(IMIN)
506          B22D(IMIN) = TEMP
507          B22BULGE = WORK(IU2SN+IMIN-1)*B22D(IMIN+1)
508          B22D(IMIN+1= WORK(IU2CS+IMIN-1)*B22D(IMIN+1)
509 *
510 *        Inner loop: chase bulges from B11(IMIN,IMIN+2),
511 *        B12(IMIN,IMIN+1), B21(IMIN,IMIN+2), and B22(IMIN,IMIN+1) to
512 *        bottom-right
513 *
514          DO I = IMIN+1, IMAX-1
515 *
516 *           Compute PHI(I-1)
517 *
518             X1 = SIN(THETA(I-1))*B11E(I-1+ COS(THETA(I-1))*B21E(I-1)
519             X2 = SIN(THETA(I-1))*B11BULGE + COS(THETA(I-1))*B21BULGE
520             Y1 = SIN(THETA(I-1))*B12D(I-1+ COS(THETA(I-1))*B22D(I-1)
521             Y2 = SIN(THETA(I-1))*B12BULGE + COS(THETA(I-1))*B22BULGE
522 *
523             PHI(I-1= ATAN2SQRT(X1**2+X2**2), SQRT(Y1**2+Y2**2) )
524 *
525 *           Determine if there are bulges to chase or if a new direct
526 *           summand has been reached
527 *
528             RESTART11 = B11E(I-1)**2 + B11BULGE**2 .LE. THRESH**2
529             RESTART21 = B21E(I-1)**2 + B21BULGE**2 .LE. THRESH**2
530             RESTART12 = B12D(I-1)**2 + B12BULGE**2 .LE. THRESH**2
531             RESTART22 = B22D(I-1)**2 + B22BULGE**2 .LE. THRESH**2
532 *
533 *           If possible, chase bulges from B11(I-1,I+1), B12(I-1,I),
534 *           B21(I-1,I+1), and B22(I-1,I). If necessary, restart bulge-
535 *           chasing by applying the original shift again.
536 *
537             IF.NOT. RESTART11 .AND. .NOT. RESTART21 ) THEN
538                CALL DLARTGP( X2, X1, WORK(IV1TSN+I-1), WORK(IV1TCS+I-1),
539      $                       R )
540             ELSE IF.NOT. RESTART11 .AND. RESTART21 ) THEN
541                CALL DLARTGP( B11BULGE, B11E(I-1), WORK(IV1TSN+I-1),
542      $                       WORK(IV1TCS+I-1), R )
543             ELSE IF( RESTART11 .AND. .NOT. RESTART21 ) THEN
544                CALL DLARTGP( B21BULGE, B21E(I-1), WORK(IV1TSN+I-1),
545      $                       WORK(IV1TCS+I-1), R )
546             ELSE IF( MU .LE. NU ) THEN
547                CALL DLARTGS( B11D(I), B11E(I), MU, WORK(IV1TCS+I-1),
548      $                       WORK(IV1TSN+I-1) )
549             ELSE
550                CALL DLARTGS( B21D(I), B21E(I), NU, WORK(IV1TCS+I-1),
551      $                       WORK(IV1TSN+I-1) )
552             END IF
553             WORK(IV1TCS+I-1= -WORK(IV1TCS+I-1)
554             WORK(IV1TSN+I-1= -WORK(IV1TSN+I-1)
555             IF.NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
556                CALL DLARTGP( Y2, Y1, WORK(IV2TSN+I-1-1),
557      $                       WORK(IV2TCS+I-1-1), R )
558             ELSE IF.NOT. RESTART12 .AND. RESTART22 ) THEN
559                CALL DLARTGP( B12BULGE, B12D(I-1), WORK(IV2TSN+I-1-1),
560      $                       WORK(IV2TCS+I-1-1), R )
561             ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
562                CALL DLARTGP( B22BULGE, B22D(I-1), WORK(IV2TSN+I-1-1),
563      $                       WORK(IV2TCS+I-1-1), R )
564             ELSE IF( NU .LT. MU ) THEN
565                CALL DLARTGS( B12E(I-1), B12D(I), NU, WORK(IV2TCS+I-1-1),
566      $                       WORK(IV2TSN+I-1-1) )
567             ELSE
568                CALL DLARTGS( B22E(I-1), B22D(I), MU, WORK(IV2TCS+I-1-1),
569      $                       WORK(IV2TSN+I-1-1) )
570             END IF
571 *
572             TEMP = WORK(IV1TCS+I-1)*B11D(I) + WORK(IV1TSN+I-1)*B11E(I)
573             B11E(I) = WORK(IV1TCS+I-1)*B11E(I) -
574      $                WORK(IV1TSN+I-1)*B11D(I)
575             B11D(I) = TEMP
576             B11BULGE = WORK(IV1TSN+I-1)*B11D(I+1)
577             B11D(I+1= WORK(IV1TCS+I-1)*B11D(I+1)
578             TEMP = WORK(IV1TCS+I-1)*B21D(I) + WORK(IV1TSN+I-1)*B21E(I)
579             B21E(I) = WORK(IV1TCS+I-1)*B21E(I) -
580      $                WORK(IV1TSN+I-1)*B21D(I)
581             B21D(I) = TEMP
582             B21BULGE = WORK(IV1TSN+I-1)*B21D(I+1)
583             B21D(I+1= WORK(IV1TCS+I-1)*B21D(I+1)
584             TEMP = WORK(IV2TCS+I-1-1)*B12E(I-1+
585      $             WORK(IV2TSN+I-1-1)*B12D(I)
586             B12D(I) = WORK(IV2TCS+I-1-1)*B12D(I) -
587      $                WORK(IV2TSN+I-1-1)*B12E(I-1)
588             B12E(I-1= TEMP
589             B12BULGE = WORK(IV2TSN+I-1-1)*B12E(I)
590             B12E(I) = WORK(IV2TCS+I-1-1)*B12E(I)
591             TEMP = WORK(IV2TCS+I-1-1)*B22E(I-1+
592      $             WORK(IV2TSN+I-1-1)*B22D(I)
593             B22D(I) = WORK(IV2TCS+I-1-1)*B22D(I) -
594      $                WORK(IV2TSN+I-1-1)*B22E(I-1)
595             B22E(I-1= TEMP
596             B22BULGE = WORK(IV2TSN+I-1-1)*B22E(I)
597             B22E(I) = WORK(IV2TCS+I-1-1)*B22E(I)
598 *
599 *           Compute THETA(I)
600 *
601             X1 = COS(PHI(I-1))*B11D(I) + SIN(PHI(I-1))*B12E(I-1)
602             X2 = COS(PHI(I-1))*B11BULGE + SIN(PHI(I-1))*B12BULGE
603             Y1 = COS(PHI(I-1))*B21D(I) + SIN(PHI(I-1))*B22E(I-1)
604             Y2 = COS(PHI(I-1))*B21BULGE + SIN(PHI(I-1))*B22BULGE
605 *
606             THETA(I) = ATAN2SQRT(Y1**2+Y2**2), SQRT(X1**2+X2**2) )
607 *
608 *           Determine if there are bulges to chase or if a new direct
609 *           summand has been reached
610 *
611             RESTART11 =   B11D(I)**2 + B11BULGE**2 .LE. THRESH**2
612             RESTART12 = B12E(I-1)**2 + B12BULGE**2 .LE. THRESH**2
613             RESTART21 =   B21D(I)**2 + B21BULGE**2 .LE. THRESH**2
614             RESTART22 = B22E(I-1)**2 + B22BULGE**2 .LE. THRESH**2
615 *
616 *           If possible, chase bulges from B11(I+1,I), B12(I+1,I-1),
617 *           B21(I+1,I), and B22(I+1,I-1). If necessary, restart bulge-
618 *           chasing by applying the original shift again.
619 *
620             IF.NOT. RESTART11 .AND. .NOT. RESTART12 ) THEN
621                CALL DLARTGP( X2, X1, WORK(IU1SN+I-1), WORK(IU1CS+I-1),
622      $                       R )
623             ELSE IF.NOT. RESTART11 .AND. RESTART12 ) THEN
624                CALL DLARTGP( B11BULGE, B11D(I), WORK(IU1SN+I-1),
625      $                       WORK(IU1CS+I-1), R )
626             ELSE IF( RESTART11 .AND. .NOT. RESTART12 ) THEN
627                CALL DLARTGP( B12BULGE, B12E(I-1), WORK(IU1SN+I-1),
628      $                       WORK(IU1CS+I-1), R )
629             ELSE IF( MU .LE. NU ) THEN
630                CALL DLARTGS( B11E(I), B11D(I+1), MU, WORK(IU1CS+I-1),
631      $                       WORK(IU1SN+I-1) )
632             ELSE
633                CALL DLARTGS( B12D(I), B12E(I), NU, WORK(IU1CS+I-1),
634      $                       WORK(IU1SN+I-1) )
635             END IF
636             IF.NOT. RESTART21 .AND. .NOT. RESTART22 ) THEN
637                CALL DLARTGP( Y2, Y1, WORK(IU2SN+I-1), WORK(IU2CS+I-1),
638      $                       R )
639             ELSE IF.NOT. RESTART21 .AND. RESTART22 ) THEN
640                CALL DLARTGP( B21BULGE, B21D(I), WORK(IU2SN+I-1),
641      $                       WORK(IU2CS+I-1), R )
642             ELSE IF( RESTART21 .AND. .NOT. RESTART22 ) THEN
643                CALL DLARTGP( B22BULGE, B22E(I-1), WORK(IU2SN+I-1),
644      $                       WORK(IU2CS+I-1), R )
645             ELSE IF( NU .LT. MU ) THEN
646                CALL DLARTGS( B21E(I), B21E(I+1), NU, WORK(IU2CS+I-1),
647      $                       WORK(IU2SN+I-1) )
648             ELSE
649                CALL DLARTGS( B22D(I), B22E(I), MU, WORK(IU2CS+I-1),
650      $                       WORK(IU2SN+I-1) )
651             END IF
652             WORK(IU2CS+I-1= -WORK(IU2CS+I-1)
653             WORK(IU2SN+I-1= -WORK(IU2SN+I-1)
654 *
655             TEMP = WORK(IU1CS+I-1)*B11E(I) + WORK(IU1SN+I-1)*B11D(I+1)
656             B11D(I+1= WORK(IU1CS+I-1)*B11D(I+1-
657      $                  WORK(IU1SN+I-1)*B11E(I)
658             B11E(I) = TEMP
659             IF( I .LT. IMAX - 1 ) THEN
660                B11BULGE = WORK(IU1SN+I-1)*B11E(I+1)
661                B11E(I+1= WORK(IU1CS+I-1)*B11E(I+1)
662             END IF
663             TEMP = WORK(IU2CS+I-1)*B21E(I) + WORK(IU2SN+I-1)*B21D(I+1)
664             B21D(I+1= WORK(IU2CS+I-1)*B21D(I+1-
665      $                  WORK(IU2SN+I-1)*B21E(I)
666             B21E(I) = TEMP
667             IF( I .LT. IMAX - 1 ) THEN
668                B21BULGE = WORK(IU2SN+I-1)*B21E(I+1)
669                B21E(I+1= WORK(IU2CS+I-1)*B21E(I+1)
670             END IF
671             TEMP = WORK(IU1CS+I-1)*B12D(I) + WORK(IU1SN+I-1)*B12E(I)
672             B12E(I) = WORK(IU1CS+I-1)*B12E(I) - WORK(IU1SN+I-1)*B12D(I)
673             B12D(I) = TEMP
674             B12BULGE = WORK(IU1SN+I-1)*B12D(I+1)
675             B12D(I+1= WORK(IU1CS+I-1)*B12D(I+1)
676             TEMP = WORK(IU2CS+I-1)*B22D(I) + WORK(IU2SN+I-1)*B22E(I)
677             B22E(I) = WORK(IU2CS+I-1)*B22E(I) - WORK(IU2SN+I-1)*B22D(I)
678             B22D(I) = TEMP
679             B22BULGE = WORK(IU2SN+I-1)*B22D(I+1)
680             B22D(I+1= WORK(IU2CS+I-1)*B22D(I+1)
681 *
682          END DO
683 *
684 *        Compute PHI(IMAX-1)
685 *
686          X1 = SIN(THETA(IMAX-1))*B11E(IMAX-1+
687      $        COS(THETA(IMAX-1))*B21E(IMAX-1)
688          Y1 = SIN(THETA(IMAX-1))*B12D(IMAX-1+
689      $        COS(THETA(IMAX-1))*B22D(IMAX-1)
690          Y2 = SIN(THETA(IMAX-1))*B12BULGE + COS(THETA(IMAX-1))*B22BULGE
691 *
692          PHI(IMAX-1= ATAN2ABS(X1), SQRT(Y1**2+Y2**2) )
693 *
694 *        Chase bulges from B12(IMAX-1,IMAX) and B22(IMAX-1,IMAX)
695 *
696          RESTART12 = B12D(IMAX-1)**2 + B12BULGE**2 .LE. THRESH**2
697          RESTART22 = B22D(IMAX-1)**2 + B22BULGE**2 .LE. THRESH**2
698 *
699          IF.NOT. RESTART12 .AND. .NOT. RESTART22 ) THEN
700             CALL DLARTGP( Y2, Y1, WORK(IV2TSN+IMAX-1-1),
701      $                    WORK(IV2TCS+IMAX-1-1), R )
702          ELSE IF.NOT. RESTART12 .AND. RESTART22 ) THEN
703             CALL DLARTGP( B12BULGE, B12D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
704      $                    WORK(IV2TCS+IMAX-1-1), R )
705          ELSE IF( RESTART12 .AND. .NOT. RESTART22 ) THEN
706             CALL DLARTGP( B22BULGE, B22D(IMAX-1), WORK(IV2TSN+IMAX-1-1),
707      $                    WORK(IV2TCS+IMAX-1-1), R )
708          ELSE IF( NU .LT. MU ) THEN
709             CALL DLARTGS( B12E(IMAX-1), B12D(IMAX), NU,
710      $                    WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
711          ELSE
712             CALL DLARTGS( B22E(IMAX-1), B22D(IMAX), MU,
713      $                    WORK(IV2TCS+IMAX-1-1), WORK(IV2TSN+IMAX-1-1) )
714          END IF
715 *
716          TEMP = WORK(IV2TCS+IMAX-1-1)*B12E(IMAX-1+
717      $          WORK(IV2TSN+IMAX-1-1)*B12D(IMAX)
718          B12D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B12D(IMAX) -
719      $                WORK(IV2TSN+IMAX-1-1)*B12E(IMAX-1)
720          B12E(IMAX-1= TEMP
721          TEMP = WORK(IV2TCS+IMAX-1-1)*B22E(IMAX-1+
722      $          WORK(IV2TSN+IMAX-1-1)*B22D(IMAX)
723          B22D(IMAX) = WORK(IV2TCS+IMAX-1-1)*B22D(IMAX) -
724      $                WORK(IV2TSN+IMAX-1-1)*B22E(IMAX-1)
725          B22E(IMAX-1= TEMP
726 *
727 *        Update singular vectors
728 *
729          IF( WANTU1 ) THEN
730             IF( COLMAJOR ) THEN
731                CALL DLASR( 'R''V''F', P, IMAX-IMIN+1,
732      $                     WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
733      $                     U1(1,IMIN), LDU1 )
734             ELSE
735                CALL DLASR( 'L''V''F', IMAX-IMIN+1, P,
736      $                     WORK(IU1CS+IMIN-1), WORK(IU1SN+IMIN-1),
737      $                     U1(IMIN,1), LDU1 )
738             END IF
739          END IF
740          IF( WANTU2 ) THEN
741             IF( COLMAJOR ) THEN
742                CALL DLASR( 'R''V''F', M-P, IMAX-IMIN+1,
743      $                     WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
744      $                     U2(1,IMIN), LDU2 )
745             ELSE
746                CALL DLASR( 'L''V''F', IMAX-IMIN+1, M-P,
747      $                     WORK(IU2CS+IMIN-1), WORK(IU2SN+IMIN-1),
748      $                     U2(IMIN,1), LDU2 )
749             END IF
750          END IF
751          IF( WANTV1T ) THEN
752             IF( COLMAJOR ) THEN
753                CALL DLASR( 'L''V''F', IMAX-IMIN+1, Q,
754      $                     WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
755      $                     V1T(IMIN,1), LDV1T )
756             ELSE
757                CALL DLASR( 'R''V''F', Q, IMAX-IMIN+1,
758      $                     WORK(IV1TCS+IMIN-1), WORK(IV1TSN+IMIN-1),
759      $                     V1T(1,IMIN), LDV1T )
760             END IF
761          END IF
762          IF( WANTV2T ) THEN
763             IF( COLMAJOR ) THEN
764                CALL DLASR( 'L''V''F', IMAX-IMIN+1, M-Q,
765      $                     WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
766      $                     V2T(IMIN,1), LDV2T )
767             ELSE
768                CALL DLASR( 'R''V''F', M-Q, IMAX-IMIN+1,
769      $                     WORK(IV2TCS+IMIN-1), WORK(IV2TSN+IMIN-1),
770      $                     V2T(1,IMIN), LDV2T )
771             END IF
772          END IF
773 *
774 *        Fix signs on B11(IMAX-1,IMAX) and B21(IMAX-1,IMAX)
775 *
776          IF( B11E(IMAX-1)+B21E(IMAX-1.GT. 0 ) THEN
777             B11D(IMAX) = -B11D(IMAX)
778             B21D(IMAX) = -B21D(IMAX)
779             IF( WANTV1T ) THEN
780                IF( COLMAJOR ) THEN
781                   CALL DSCAL( Q, NEGONECOMPLEX, V1T(IMAX,1), LDV1T )
782                ELSE
783                   CALL DSCAL( Q, NEGONECOMPLEX, V1T(1,IMAX), 1 )
784                END IF
785             END IF
786          END IF
787 *
788 *        Compute THETA(IMAX)
789 *
790          X1 = COS(PHI(IMAX-1))*B11D(IMAX) +
791      $        SIN(PHI(IMAX-1))*B12E(IMAX-1)
792          Y1 = COS(PHI(IMAX-1))*B21D(IMAX) +
793      $        SIN(PHI(IMAX-1))*B22E(IMAX-1)
794 *
795          THETA(IMAX) = ATAN2ABS(Y1), ABS(X1) )
796 *
797 *        Fix signs on B11(IMAX,IMAX), B12(IMAX,IMAX-1), B21(IMAX,IMAX),
798 *        and B22(IMAX,IMAX-1)
799 *
800          IF( B11D(IMAX)+B12E(IMAX-1.LT. 0 ) THEN
801             B12D(IMAX) = -B12D(IMAX)
802             IF( WANTU1 ) THEN
803                IF( COLMAJOR ) THEN
804                   CALL DSCAL( P, NEGONECOMPLEX, U1(1,IMAX), 1 )
805                ELSE
806                   CALL DSCAL( P, NEGONECOMPLEX, U1(IMAX,1), LDU1 )
807                END IF
808             END IF
809          END IF
810          IF( B21D(IMAX)+B22E(IMAX-1.GT. 0 ) THEN
811             B22D(IMAX) = -B22D(IMAX)
812             IF( WANTU2 ) THEN
813                IF( COLMAJOR ) THEN
814                   CALL DSCAL( M-P, NEGONECOMPLEX, U2(1,IMAX), 1 )
815                ELSE
816                   CALL DSCAL( M-P, NEGONECOMPLEX, U2(IMAX,1), LDU2 )
817                END IF
818             END IF
819          END IF
820 *
821 *        Fix signs on B12(IMAX,IMAX) and B22(IMAX,IMAX)
822 *
823          IF( B12D(IMAX)+B22D(IMAX) .LT. 0 ) THEN
824             IF( WANTV2T ) THEN
825                IF( COLMAJOR ) THEN
826                   CALL DSCAL( M-Q, NEGONECOMPLEX, V2T(IMAX,1), LDV2T )
827                ELSE
828                   CALL DSCAL( M-Q, NEGONECOMPLEX, V2T(1,IMAX), 1 )
829                END IF
830             END IF
831          END IF
832 *
833 *        Test for negligible sines or cosines
834 *
835          DO I = IMIN, IMAX
836             IF( THETA(I) .LT. THRESH ) THEN
837                THETA(I) = ZERO
838             ELSE IF( THETA(I) .GT. PIOVER2-THRESH ) THEN
839                THETA(I) = PIOVER2
840             END IF
841          END DO
842          DO I = IMIN, IMAX-1
843             IF( PHI(I) .LT. THRESH ) THEN
844                PHI(I) = ZERO
845             ELSE IF( PHI(I) .GT. PIOVER2-THRESH ) THEN
846                PHI(I) = PIOVER2
847             END IF
848          END DO
849 *
850 *        Deflate
851 *
852          IF (IMAX .GT. 1THEN
853             DO WHILE( PHI(IMAX-1.EQ. ZERO )
854                IMAX = IMAX - 1
855                IF (IMAX .LE. 1EXIT
856             END DO
857          END IF
858          IF( IMIN .GT. IMAX - 1 )
859      $      IMIN = IMAX - 1
860          IF (IMIN .GT. 1THEN
861             DO WHILE (PHI(IMIN-1.NE. ZERO)
862                 IMIN = IMIN - 1
863                 IF (IMIN .LE. 1EXIT
864             END DO
865          END IF
866 *
867 *        Repeat main iteration loop
868 *
869       END DO
870 *
871 *     Postprocessing: order THETA from least to greatest
872 *
873       DO I = 1, Q
874 *
875          MINI = I
876          THETAMIN = THETA(I)
877          DO J = I+1, Q
878             IF( THETA(J) .LT. THETAMIN ) THEN
879                MINI = J
880                THETAMIN = THETA(J)
881             END IF
882          END DO
883 *
884          IF( MINI .NE. I ) THEN
885             THETA(MINI) = THETA(I)
886             THETA(I) = THETAMIN
887             IF( COLMAJOR ) THEN
888                IF( WANTU1 )
889      $            CALL DSWAP( P, U1(1,I), 1, U1(1,MINI), 1 )
890                IF( WANTU2 )
891      $            CALL DSWAP( M-P, U2(1,I), 1, U2(1,MINI), 1 )
892                IF( WANTV1T )
893      $            CALL DSWAP( Q, V1T(I,1), LDV1T, V1T(MINI,1), LDV1T )
894                IF( WANTV2T )
895      $            CALL DSWAP( M-Q, V2T(I,1), LDV2T, V2T(MINI,1),
896      $               LDV2T )
897             ELSE
898                IF( WANTU1 )
899      $            CALL DSWAP( P, U1(I,1), LDU1, U1(MINI,1), LDU1 )
900                IF( WANTU2 )
901      $            CALL DSWAP( M-P, U2(I,1), LDU2, U2(MINI,1), LDU2 )
902                IF( WANTV1T )
903      $            CALL DSWAP( Q, V1T(1,I), 1, V1T(1,MINI), 1 )
904                IF( WANTV2T )
905      $            CALL DSWAP( M-Q, V2T(1,I), 1, V2T(1,MINI), 1 )
906             END IF
907          END IF
908 *
909       END DO
910 *
911       RETURN
912 *
913 *     End of DBBCSD
914 *
915       END
916