1       SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
   2      $                   WORK, LWORK, INFO )
   3 *
   4 *  -- LAPACK driver routine (version 3.3.1) --
   5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
   6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
   7 *  -- April 2011                                                      --
   8 *
   9 *     .. Scalar Arguments ..
  10       CHARACTER          JOBU, JOBVT
  11       INTEGER            INFO, LDA, LDU, LDVT, LWORK, M, N
  12 *     ..
  13 *     .. Array Arguments ..
  14       DOUBLE PRECISION   A( LDA, * ), S( * ), U( LDU, * ),
  15      $                   VT( LDVT, * ), WORK( * )
  16 *     ..
  17 *
  18 *  Purpose
  19 *  =======
  20 *
  21 *  DGESVD computes the singular value decomposition (SVD) of a real
  22 *  M-by-N matrix A, optionally computing the left and/or right singular
  23 *  vectors. The SVD is written
  24 *
  25 *       A = U * SIGMA * transpose(V)
  26 *
  27 *  where SIGMA is an M-by-N matrix which is zero except for its
  28 *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
  29 *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
  30 *  are the singular values of A; they are real and non-negative, and
  31 *  are returned in descending order.  The first min(m,n) columns of
  32 *  U and V are the left and right singular vectors of A.
  33 *
  34 *  Note that the routine returns V**T, not V.
  35 *
  36 *  Arguments
  37 *  =========
  38 *
  39 *  JOBU    (input) CHARACTER*1
  40 *          Specifies options for computing all or part of the matrix U:
  41 *          = 'A':  all M columns of U are returned in array U:
  42 *          = 'S':  the first min(m,n) columns of U (the left singular
  43 *                  vectors) are returned in the array U;
  44 *          = 'O':  the first min(m,n) columns of U (the left singular
  45 *                  vectors) are overwritten on the array A;
  46 *          = 'N':  no columns of U (no left singular vectors) are
  47 *                  computed.
  48 *
  49 *  JOBVT   (input) CHARACTER*1
  50 *          Specifies options for computing all or part of the matrix
  51 *          V**T:
  52 *          = 'A':  all N rows of V**T are returned in the array VT;
  53 *          = 'S':  the first min(m,n) rows of V**T (the right singular
  54 *                  vectors) are returned in the array VT;
  55 *          = 'O':  the first min(m,n) rows of V**T (the right singular
  56 *                  vectors) are overwritten on the array A;
  57 *          = 'N':  no rows of V**T (no right singular vectors) are
  58 *                  computed.
  59 *
  60 *          JOBVT and JOBU cannot both be 'O'.
  61 *
  62 *  M       (input) INTEGER
  63 *          The number of rows of the input matrix A.  M >= 0.
  64 *
  65 *  N       (input) INTEGER
  66 *          The number of columns of the input matrix A.  N >= 0.
  67 *
  68 *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
  69 *          On entry, the M-by-N matrix A.
  70 *          On exit,
  71 *          if JOBU = 'O',  A is overwritten with the first min(m,n)
  72 *                          columns of U (the left singular vectors,
  73 *                          stored columnwise);
  74 *          if JOBVT = 'O', A is overwritten with the first min(m,n)
  75 *                          rows of V**T (the right singular vectors,
  76 *                          stored rowwise);
  77 *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
  78 *                          are destroyed.
  79 *
  80 *  LDA     (input) INTEGER
  81 *          The leading dimension of the array A.  LDA >= max(1,M).
  82 *
  83 *  S       (output) DOUBLE PRECISION array, dimension (min(M,N))
  84 *          The singular values of A, sorted so that S(i) >= S(i+1).
  85 *
  86 *  U       (output) DOUBLE PRECISION array, dimension (LDU,UCOL)
  87 *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
  88 *          If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
  89 *          if JOBU = 'S', U contains the first min(m,n) columns of U
  90 *          (the left singular vectors, stored columnwise);
  91 *          if JOBU = 'N' or 'O', U is not referenced.
  92 *
  93 *  LDU     (input) INTEGER
  94 *          The leading dimension of the array U.  LDU >= 1; if
  95 *          JOBU = 'S' or 'A', LDU >= M.
  96 *
  97 *  VT      (output) DOUBLE PRECISION array, dimension (LDVT,N)
  98 *          If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
  99 *          V**T;
 100 *          if JOBVT = 'S', VT contains the first min(m,n) rows of
 101 *          V**T (the right singular vectors, stored rowwise);
 102 *          if JOBVT = 'N' or 'O', VT is not referenced.
 103 *
 104 *  LDVT    (input) INTEGER
 105 *          The leading dimension of the array VT.  LDVT >= 1; if
 106 *          JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
 107 *
 108 *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
 109 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
 110 *          if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
 111 *          superdiagonal elements of an upper bidiagonal matrix B
 112 *          whose diagonal is in S (not necessarily sorted). B
 113 *          satisfies A = U * B * VT, so it has the same singular values
 114 *          as A, and singular vectors related by U and VT.
 115 *
 116 *  LWORK   (input) INTEGER
 117 *          The dimension of the array WORK.
 118 *          LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
 119 *             - PATH 1  (M much larger than N, JOBU='N') 
 120 *             - PATH 1t (N much larger than M, JOBVT='N')
 121 *          LWORK >= MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) for the other paths
 122 *          For good performance, LWORK should generally be larger.
 123 *
 124 *          If LWORK = -1, then a workspace query is assumed; the routine
 125 *          only calculates the optimal size of the WORK array, returns
 126 *          this value as the first entry of the WORK array, and no error
 127 *          message related to LWORK is issued by XERBLA.
 128 *
 129 *  INFO    (output) INTEGER
 130 *          = 0:  successful exit.
 131 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
 132 *          > 0:  if DBDSQR did not converge, INFO specifies how many
 133 *                superdiagonals of an intermediate bidiagonal form B
 134 *                did not converge to zero. See the description of WORK
 135 *                above for details.
 136 *
 137 *  =====================================================================
 138 *
 139 *     .. Parameters ..
 140       DOUBLE PRECISION   ZERO, ONE
 141       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0 )
 142 *     ..
 143 *     .. Local Scalars ..
 144       LOGICAL            LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
 145      $                   WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
 146       INTEGER            BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
 147      $                   ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
 148      $                   MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
 149      $                   NRVT, WRKBL
 150       DOUBLE PRECISION   ANRM, BIGNUM, EPS, SMLNUM
 151 *     ..
 152 *     .. Local Arrays ..
 153       DOUBLE PRECISION   DUM( 1 )
 154 *     ..
 155 *     .. External Subroutines ..
 156       EXTERNAL           DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
 157      $                   DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
 158      $                   XERBLA
 159 *     ..
 160 *     .. External Functions ..
 161       LOGICAL            LSAME
 162       INTEGER            ILAENV
 163       DOUBLE PRECISION   DLAMCH, DLANGE
 164       EXTERNAL           LSAME, ILAENV, DLAMCH, DLANGE
 165 *     ..
 166 *     .. Intrinsic Functions ..
 167       INTRINSIC          MAXMINSQRT
 168 *     ..
 169 *     .. Executable Statements ..
 170 *
 171 *     Test the input arguments
 172 *
 173       INFO = 0
 174       MINMN = MIN( M, N )
 175       WNTUA = LSAME( JOBU, 'A' )
 176       WNTUS = LSAME( JOBU, 'S' )
 177       WNTUAS = WNTUA .OR. WNTUS
 178       WNTUO = LSAME( JOBU, 'O' )
 179       WNTUN = LSAME( JOBU, 'N' )
 180       WNTVA = LSAME( JOBVT, 'A' )
 181       WNTVS = LSAME( JOBVT, 'S' )
 182       WNTVAS = WNTVA .OR. WNTVS
 183       WNTVO = LSAME( JOBVT, 'O' )
 184       WNTVN = LSAME( JOBVT, 'N' )
 185       LQUERY = ( LWORK.EQ.-1 )
 186 *
 187       IF.NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
 188          INFO = -1
 189       ELSE IF.NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
 190      $         ( WNTVO .AND. WNTUO ) ) THEN
 191          INFO = -2
 192       ELSE IF( M.LT.0 ) THEN
 193          INFO = -3
 194       ELSE IF( N.LT.0 ) THEN
 195          INFO = -4
 196       ELSE IF( LDA.LT.MAX1, M ) ) THEN
 197          INFO = -6
 198       ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
 199          INFO = -9
 200       ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
 201      $         ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
 202          INFO = -11
 203       END IF
 204 *
 205 *     Compute workspace
 206 *      (Note: Comments in the code beginning "Workspace:" describe the
 207 *       minimal amount of workspace needed at that point in the code,
 208 *       as well as the preferred amount for good performance.
 209 *       NB refers to the optimal block size for the immediately
 210 *       following subroutine, as returned by ILAENV.)
 211 *
 212       IF( INFO.EQ.0 ) THEN
 213          MINWRK = 1
 214          MAXWRK = 1
 215          IF( M.GE..AND. MINMN.GT.0 ) THEN
 216 *
 217 *           Compute space needed for DBDSQR
 218 *
 219             MNTHR = ILAENV( 6'DGESVD', JOBU // JOBVT, M, N, 00 )
 220             BDSPAC = 5*N
 221             IF( M.GE.MNTHR ) THEN
 222                IF( WNTUN ) THEN
 223 *
 224 *                 Path 1 (M much larger than N, JOBU='N')
 225 *
 226                   MAXWRK = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1,
 227      $                     -1 )
 228                   MAXWRK = MAX( MAXWRK, 3*N+2*N*
 229      $                     ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 230                   IF( WNTVO .OR. WNTVAS )
 231      $               MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
 232      $                        ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 233                   MAXWRK = MAX( MAXWRK, BDSPAC )
 234                   MINWRK = MAX4*N, BDSPAC )
 235                ELSE IF( WNTUO .AND. WNTVN ) THEN
 236 *
 237 *                 Path 2 (M much larger than N, JOBU='O', JOBVT='N')
 238 *
 239                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 240                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1'DORGQR'' ', M,
 241      $                    N, N, -1 ) )
 242                   WRKBL = MAX( WRKBL, 3*N+2*N*
 243      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 244                   WRKBL = MAX( WRKBL, 3*N+N*
 245      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 246                   WRKBL = MAX( WRKBL, BDSPAC )
 247                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
 248                   MINWRK = MAX3*N+M, BDSPAC )
 249                ELSE IF( WNTUO .AND. WNTVAS ) THEN
 250 *
 251 *                 Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
 252 *                 'A')
 253 *
 254                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 255                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1'DORGQR'' ', M,
 256      $                    N, N, -1 ) )
 257                   WRKBL = MAX( WRKBL, 3*N+2*N*
 258      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 259                   WRKBL = MAX( WRKBL, 3*N+N*
 260      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 261                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
 262      $                    ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 263                   WRKBL = MAX( WRKBL, BDSPAC )
 264                   MAXWRK = MAX( N*N+WRKBL, N*N+M*N+N )
 265                   MINWRK = MAX3*N+M, BDSPAC )
 266                ELSE IF( WNTUS .AND. WNTVN ) THEN
 267 *
 268 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
 269 *
 270                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 271                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1'DORGQR'' ', M,
 272      $                    N, N, -1 ) )
 273                   WRKBL = MAX( WRKBL, 3*N+2*N*
 274      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 275                   WRKBL = MAX( WRKBL, 3*N+N*
 276      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 277                   WRKBL = MAX( WRKBL, BDSPAC )
 278                   MAXWRK = N*+ WRKBL
 279                   MINWRK = MAX3*N+M, BDSPAC )
 280                ELSE IF( WNTUS .AND. WNTVO ) THEN
 281 *
 282 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
 283 *
 284                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 285                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1'DORGQR'' ', M,
 286      $                    N, N, -1 ) )
 287                   WRKBL = MAX( WRKBL, 3*N+2*N*
 288      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 289                   WRKBL = MAX( WRKBL, 3*N+N*
 290      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 291                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
 292      $                    ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 293                   WRKBL = MAX( WRKBL, BDSPAC )
 294                   MAXWRK = 2*N*+ WRKBL
 295                   MINWRK = MAX3*N+M, BDSPAC )
 296                ELSE IF( WNTUS .AND. WNTVAS ) THEN
 297 *
 298 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
 299 *                 'A')
 300 *
 301                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 302                   WRKBL = MAX( WRKBL, N+N*ILAENV( 1'DORGQR'' ', M,
 303      $                    N, N, -1 ) )
 304                   WRKBL = MAX( WRKBL, 3*N+2*N*
 305      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 306                   WRKBL = MAX( WRKBL, 3*N+N*
 307      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 308                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
 309      $                    ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 310                   WRKBL = MAX( WRKBL, BDSPAC )
 311                   MAXWRK = N*+ WRKBL
 312                   MINWRK = MAX3*N+M, BDSPAC )
 313                ELSE IF( WNTUA .AND. WNTVN ) THEN
 314 *
 315 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
 316 *
 317                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 318                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1'DORGQR'' ', M,
 319      $                    M, N, -1 ) )
 320                   WRKBL = MAX( WRKBL, 3*N+2*N*
 321      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 322                   WRKBL = MAX( WRKBL, 3*N+N*
 323      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 324                   WRKBL = MAX( WRKBL, BDSPAC )
 325                   MAXWRK = N*+ WRKBL
 326                   MINWRK = MAX3*N+M, BDSPAC )
 327                ELSE IF( WNTUA .AND. WNTVO ) THEN
 328 *
 329 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
 330 *
 331                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 332                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1'DORGQR'' ', M,
 333      $                    M, N, -1 ) )
 334                   WRKBL = MAX( WRKBL, 3*N+2*N*
 335      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 336                   WRKBL = MAX( WRKBL, 3*N+N*
 337      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 338                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
 339      $                    ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 340                   WRKBL = MAX( WRKBL, BDSPAC )
 341                   MAXWRK = 2*N*+ WRKBL
 342                   MINWRK = MAX3*N+M, BDSPAC )
 343                ELSE IF( WNTUA .AND. WNTVAS ) THEN
 344 *
 345 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
 346 *                 'A')
 347 *
 348                   WRKBL = N + N*ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
 349                   WRKBL = MAX( WRKBL, N+M*ILAENV( 1'DORGQR'' ', M,
 350      $                    M, N, -1 ) )
 351                   WRKBL = MAX( WRKBL, 3*N+2*N*
 352      $                    ILAENV( 1'DGEBRD'' ', N, N, -1-1 ) )
 353                   WRKBL = MAX( WRKBL, 3*N+N*
 354      $                    ILAENV( 1'DORGBR''Q', N, N, N, -1 ) )
 355                   WRKBL = MAX( WRKBL, 3*N+( N-1 )*
 356      $                    ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 357                   WRKBL = MAX( WRKBL, BDSPAC )
 358                   MAXWRK = N*+ WRKBL
 359                   MINWRK = MAX3*N+M, BDSPAC )
 360                END IF
 361             ELSE
 362 *
 363 *              Path 10 (M at least N, but not much larger)
 364 *
 365                MAXWRK = 3*+ ( M+N )*ILAENV( 1'DGEBRD'' ', M, N,
 366      $                  -1-1 )
 367                IF( WNTUS .OR. WNTUO )
 368      $            MAXWRK = MAX( MAXWRK, 3*N+N*
 369      $                     ILAENV( 1'DORGBR''Q', M, N, N, -1 ) )
 370                IF( WNTUA )
 371      $            MAXWRK = MAX( MAXWRK, 3*N+M*
 372      $                     ILAENV( 1'DORGBR''Q', M, M, N, -1 ) )
 373                IF.NOT.WNTVN )
 374      $            MAXWRK = MAX( MAXWRK, 3*N+( N-1 )*
 375      $                     ILAENV( 1'DORGBR''P', N, N, N, -1 ) )
 376                MAXWRK = MAX( MAXWRK, BDSPAC )
 377                MINWRK = MAX3*N+M, BDSPAC )
 378             END IF
 379          ELSE IF( MINMN.GT.0 ) THEN
 380 *
 381 *           Compute space needed for DBDSQR
 382 *
 383             MNTHR = ILAENV( 6'DGESVD', JOBU // JOBVT, M, N, 00 )
 384             BDSPAC = 5*M
 385             IF( N.GE.MNTHR ) THEN
 386                IF( WNTVN ) THEN
 387 *
 388 *                 Path 1t(N much larger than M, JOBVT='N')
 389 *
 390                   MAXWRK = M + M*ILAENV( 1'DGELQF'' ', M, N, -1,
 391      $                     -1 )
 392                   MAXWRK = MAX( MAXWRK, 3*M+2*M*
 393      $                     ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 394                   IF( WNTUO .OR. WNTUAS )
 395      $               MAXWRK = MAX( MAXWRK, 3*M+M*
 396      $                        ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 397                   MAXWRK = MAX( MAXWRK, BDSPAC )
 398                   MINWRK = MAX4*M, BDSPAC )
 399                ELSE IF( WNTVO .AND. WNTUN ) THEN
 400 *
 401 *                 Path 2t(N much larger than M, JOBU='N', JOBVT='O')
 402 *
 403                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 404                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1'DORGLQ'' ', M,
 405      $                    N, M, -1 ) )
 406                   WRKBL = MAX( WRKBL, 3*M+2*M*
 407      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 408                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 409      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 410                   WRKBL = MAX( WRKBL, BDSPAC )
 411                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
 412                   MINWRK = MAX3*M+N, BDSPAC )
 413                ELSE IF( WNTVO .AND. WNTUAS ) THEN
 414 *
 415 *                 Path 3t(N much larger than M, JOBU='S' or 'A',
 416 *                 JOBVT='O')
 417 *
 418                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 419                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1'DORGLQ'' ', M,
 420      $                    N, M, -1 ) )
 421                   WRKBL = MAX( WRKBL, 3*M+2*M*
 422      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 423                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 424      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 425                   WRKBL = MAX( WRKBL, 3*M+M*
 426      $                    ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 427                   WRKBL = MAX( WRKBL, BDSPAC )
 428                   MAXWRK = MAX( M*M+WRKBL, M*M+M*N+M )
 429                   MINWRK = MAX3*M+N, BDSPAC )
 430                ELSE IF( WNTVS .AND. WNTUN ) THEN
 431 *
 432 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
 433 *
 434                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 435                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1'DORGLQ'' ', M,
 436      $                    N, M, -1 ) )
 437                   WRKBL = MAX( WRKBL, 3*M+2*M*
 438      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 439                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 440      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 441                   WRKBL = MAX( WRKBL, BDSPAC )
 442                   MAXWRK = M*+ WRKBL
 443                   MINWRK = MAX3*M+N, BDSPAC )
 444                ELSE IF( WNTVS .AND. WNTUO ) THEN
 445 *
 446 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
 447 *
 448                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 449                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1'DORGLQ'' ', M,
 450      $                    N, M, -1 ) )
 451                   WRKBL = MAX( WRKBL, 3*M+2*M*
 452      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 453                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 454      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 455                   WRKBL = MAX( WRKBL, 3*M+M*
 456      $                    ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 457                   WRKBL = MAX( WRKBL, BDSPAC )
 458                   MAXWRK = 2*M*+ WRKBL
 459                   MINWRK = MAX3*M+N, BDSPAC )
 460                ELSE IF( WNTVS .AND. WNTUAS ) THEN
 461 *
 462 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
 463 *                 JOBVT='S')
 464 *
 465                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 466                   WRKBL = MAX( WRKBL, M+M*ILAENV( 1'DORGLQ'' ', M,
 467      $                    N, M, -1 ) )
 468                   WRKBL = MAX( WRKBL, 3*M+2*M*
 469      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 470                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 471      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 472                   WRKBL = MAX( WRKBL, 3*M+M*
 473      $                    ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 474                   WRKBL = MAX( WRKBL, BDSPAC )
 475                   MAXWRK = M*+ WRKBL
 476                   MINWRK = MAX3*M+N, BDSPAC )
 477                ELSE IF( WNTVA .AND. WNTUN ) THEN
 478 *
 479 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
 480 *
 481                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 482                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1'DORGLQ'' ', N,
 483      $                    N, M, -1 ) )
 484                   WRKBL = MAX( WRKBL, 3*M+2*M*
 485      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 486                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 487      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 488                   WRKBL = MAX( WRKBL, BDSPAC )
 489                   MAXWRK = M*+ WRKBL
 490                   MINWRK = MAX3*M+N, BDSPAC )
 491                ELSE IF( WNTVA .AND. WNTUO ) THEN
 492 *
 493 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
 494 *
 495                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 496                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1'DORGLQ'' ', N,
 497      $                    N, M, -1 ) )
 498                   WRKBL = MAX( WRKBL, 3*M+2*M*
 499      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 500                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 501      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 502                   WRKBL = MAX( WRKBL, 3*M+M*
 503      $                    ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 504                   WRKBL = MAX( WRKBL, BDSPAC )
 505                   MAXWRK = 2*M*+ WRKBL
 506                   MINWRK = MAX3*M+N, BDSPAC )
 507                ELSE IF( WNTVA .AND. WNTUAS ) THEN
 508 *
 509 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
 510 *                 JOBVT='A')
 511 *
 512                   WRKBL = M + M*ILAENV( 1'DGELQF'' ', M, N, -1-1 )
 513                   WRKBL = MAX( WRKBL, M+N*ILAENV( 1'DORGLQ'' ', N,
 514      $                    N, M, -1 ) )
 515                   WRKBL = MAX( WRKBL, 3*M+2*M*
 516      $                    ILAENV( 1'DGEBRD'' ', M, M, -1-1 ) )
 517                   WRKBL = MAX( WRKBL, 3*M+( M-1 )*
 518      $                    ILAENV( 1'DORGBR''P', M, M, M, -1 ) )
 519                   WRKBL = MAX( WRKBL, 3*M+M*
 520      $                    ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 521                   WRKBL = MAX( WRKBL, BDSPAC )
 522                   MAXWRK = M*+ WRKBL
 523                   MINWRK = MAX3*M+N, BDSPAC )
 524                END IF
 525             ELSE
 526 *
 527 *              Path 10t(N greater than M, but not much larger)
 528 *
 529                MAXWRK = 3*+ ( M+N )*ILAENV( 1'DGEBRD'' ', M, N,
 530      $                  -1-1 )
 531                IF( WNTVS .OR. WNTVO )
 532      $            MAXWRK = MAX( MAXWRK, 3*M+M*
 533      $                     ILAENV( 1'DORGBR''P', M, N, M, -1 ) )
 534                IF( WNTVA )
 535      $            MAXWRK = MAX( MAXWRK, 3*M+N*
 536      $                     ILAENV( 1'DORGBR''P', N, N, M, -1 ) )
 537                IF.NOT.WNTUN )
 538      $            MAXWRK = MAX( MAXWRK, 3*M+( M-1 )*
 539      $                     ILAENV( 1'DORGBR''Q', M, M, M, -1 ) )
 540                MAXWRK = MAX( MAXWRK, BDSPAC )
 541                MINWRK = MAX3*M+N, BDSPAC )
 542             END IF
 543          END IF
 544          MAXWRK = MAX( MAXWRK, MINWRK )
 545          WORK( 1 ) = MAXWRK
 546 *
 547          IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
 548             INFO = -13
 549          END IF
 550       END IF
 551 *
 552       IF( INFO.NE.0 ) THEN
 553          CALL XERBLA( 'DGESVD'-INFO )
 554          RETURN
 555       ELSE IF( LQUERY ) THEN
 556          RETURN
 557       END IF
 558 *
 559 *     Quick return if possible
 560 *
 561       IF( M.EQ.0 .OR. N.EQ.0 ) THEN
 562          RETURN
 563       END IF
 564 *
 565 *     Get machine constants
 566 *
 567       EPS = DLAMCH( 'P' )
 568       SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
 569       BIGNUM = ONE / SMLNUM
 570 *
 571 *     Scale A if max element outside range [SMLNUM,BIGNUM]
 572 *
 573       ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
 574       ISCL = 0
 575       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
 576          ISCL = 1
 577          CALL DLASCL( 'G'00, ANRM, SMLNUM, M, N, A, LDA, IERR )
 578       ELSE IF( ANRM.GT.BIGNUM ) THEN
 579          ISCL = 1
 580          CALL DLASCL( 'G'00, ANRM, BIGNUM, M, N, A, LDA, IERR )
 581       END IF
 582 *
 583       IF( M.GE.N ) THEN
 584 *
 585 *        A has at least as many rows as columns. If A has sufficiently
 586 *        more rows than columns, first reduce using the QR
 587 *        decomposition (if sufficient workspace available)
 588 *
 589          IF( M.GE.MNTHR ) THEN
 590 *
 591             IF( WNTUN ) THEN
 592 *
 593 *              Path 1 (M much larger than N, JOBU='N')
 594 *              No left singular vectors to be computed
 595 *
 596                ITAU = 1
 597                IWORK = ITAU + N
 598 *
 599 *              Compute A=Q*R
 600 *              (Workspace: need 2*N, prefer N+N*NB)
 601 *
 602                CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
 603      $                      LWORK-IWORK+1, IERR )
 604 *
 605 *              Zero out below R
 606 *
 607                CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 21 ), LDA )
 608                IE = 1
 609                ITAUQ = IE + N
 610                ITAUP = ITAUQ + N
 611                IWORK = ITAUP + N
 612 *
 613 *              Bidiagonalize R in A
 614 *              (Workspace: need 4*N, prefer 3*N+2*N*NB)
 615 *
 616                CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
 617      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
 618      $                      IERR )
 619                NCVT = 0
 620                IF( WNTVO .OR. WNTVAS ) THEN
 621 *
 622 *                 If right singular vectors desired, generate P'.
 623 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
 624 *
 625                   CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
 626      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 627                   NCVT = N
 628                END IF
 629                IWORK = IE + N
 630 *
 631 *              Perform bidiagonal QR iteration, computing right
 632 *              singular vectors of A in A if desired
 633 *              (Workspace: need BDSPAC)
 634 *
 635                CALL DBDSQR( 'U', N, NCVT, 00, S, WORK( IE ), A, LDA,
 636      $                      DUM, 1, DUM, 1, WORK( IWORK ), INFO )
 637 *
 638 *              If right singular vectors desired in VT, copy them there
 639 *
 640                IF( WNTVAS )
 641      $            CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
 642 *
 643             ELSE IF( WNTUO .AND. WNTVN ) THEN
 644 *
 645 *              Path 2 (M much larger than N, JOBU='O', JOBVT='N')
 646 *              N left singular vectors to be overwritten on A and
 647 *              no right singular vectors to be computed
 648 *
 649                IF( LWORK.GE.N*N+MAX4*N, BDSPAC ) ) THEN
 650 *
 651 *                 Sufficient workspace for a fast algorithm
 652 *
 653                   IR = 1
 654                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
 655 *
 656 *                    WORK(IU) is LDA by N, WORK(IR) is LDA by N
 657 *
 658                      LDWRKU = LDA
 659                      LDWRKR = LDA
 660                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
 661 *
 662 *                    WORK(IU) is LDA by N, WORK(IR) is N by N
 663 *
 664                      LDWRKU = LDA
 665                      LDWRKR = N
 666                   ELSE
 667 *
 668 *                    WORK(IU) is LDWRKU by N, WORK(IR) is N by N
 669 *
 670                      LDWRKU = ( LWORK-N*N-N ) / N
 671                      LDWRKR = N
 672                   END IF
 673                   ITAU = IR + LDWRKR*N
 674                   IWORK = ITAU + N
 675 *
 676 *                 Compute A=Q*R
 677 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 678 *
 679                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
 680      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 681 *
 682 *                 Copy R to WORK(IR) and zero out below it
 683 *
 684                   CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
 685                   CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
 686      $                         LDWRKR )
 687 *
 688 *                 Generate Q in A
 689 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 690 *
 691                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
 692      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 693                   IE = ITAU
 694                   ITAUQ = IE + N
 695                   ITAUP = ITAUQ + N
 696                   IWORK = ITAUP + N
 697 *
 698 *                 Bidiagonalize R in WORK(IR)
 699 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
 700 *
 701                   CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
 702      $                         WORK( ITAUQ ), WORK( ITAUP ),
 703      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 704 *
 705 *                 Generate left vectors bidiagonalizing R
 706 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
 707 *
 708                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
 709      $                         WORK( ITAUQ ), WORK( IWORK ),
 710      $                         LWORK-IWORK+1, IERR )
 711                   IWORK = IE + N
 712 *
 713 *                 Perform bidiagonal QR iteration, computing left
 714 *                 singular vectors of R in WORK(IR)
 715 *                 (Workspace: need N*N+BDSPAC)
 716 *
 717                   CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
 718      $                         WORK( IR ), LDWRKR, DUM, 1,
 719      $                         WORK( IWORK ), INFO )
 720                   IU = IE + N
 721 *
 722 *                 Multiply Q in A by left singular vectors of R in
 723 *                 WORK(IR), storing result in WORK(IU) and copying to A
 724 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
 725 *
 726                   DO 10 I = 1, M, LDWRKU
 727                      CHUNK = MIN( M-I+1, LDWRKU )
 728                      CALL DGEMM( 'N''N', CHUNK, N, N, ONE, A( I, 1 ),
 729      $                           LDA, WORK( IR ), LDWRKR, ZERO,
 730      $                           WORK( IU ), LDWRKU )
 731                      CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
 732      $                            A( I, 1 ), LDA )
 733    10             CONTINUE
 734 *
 735                ELSE
 736 *
 737 *                 Insufficient workspace for a fast algorithm
 738 *
 739                   IE = 1
 740                   ITAUQ = IE + N
 741                   ITAUP = ITAUQ + N
 742                   IWORK = ITAUP + N
 743 *
 744 *                 Bidiagonalize A
 745 *                 (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
 746 *
 747                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
 748      $                         WORK( ITAUQ ), WORK( ITAUP ),
 749      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 750 *
 751 *                 Generate left vectors bidiagonalizing A
 752 *                 (Workspace: need 4*N, prefer 3*N+N*NB)
 753 *
 754                   CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
 755      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 756                   IWORK = IE + N
 757 *
 758 *                 Perform bidiagonal QR iteration, computing left
 759 *                 singular vectors of A in A
 760 *                 (Workspace: need BDSPAC)
 761 *
 762                   CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
 763      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
 764 *
 765                END IF
 766 *
 767             ELSE IF( WNTUO .AND. WNTVAS ) THEN
 768 *
 769 *              Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
 770 *              N left singular vectors to be overwritten on A and
 771 *              N right singular vectors to be computed in VT
 772 *
 773                IF( LWORK.GE.N*N+MAX4*N, BDSPAC ) ) THEN
 774 *
 775 *                 Sufficient workspace for a fast algorithm
 776 *
 777                   IR = 1
 778                   IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+LDA*N ) THEN
 779 *
 780 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by N
 781 *
 782                      LDWRKU = LDA
 783                      LDWRKR = LDA
 784                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+N )+N*N ) THEN
 785 *
 786 *                    WORK(IU) is LDA by N and WORK(IR) is N by N
 787 *
 788                      LDWRKU = LDA
 789                      LDWRKR = N
 790                   ELSE
 791 *
 792 *                    WORK(IU) is LDWRKU by N and WORK(IR) is N by N
 793 *
 794                      LDWRKU = ( LWORK-N*N-N ) / N
 795                      LDWRKR = N
 796                   END IF
 797                   ITAU = IR + LDWRKR*N
 798                   IWORK = ITAU + N
 799 *
 800 *                 Compute A=Q*R
 801 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 802 *
 803                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
 804      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 805 *
 806 *                 Copy R to VT, zeroing out below it
 807 *
 808                   CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
 809                   IF( N.GT.1 )
 810      $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
 811      $                            VT( 21 ), LDVT )
 812 *
 813 *                 Generate Q in A
 814 *                 (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 815 *
 816                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
 817      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 818                   IE = ITAU
 819                   ITAUQ = IE + N
 820                   ITAUP = ITAUQ + N
 821                   IWORK = ITAUP + N
 822 *
 823 *                 Bidiagonalize R in VT, copying result to WORK(IR)
 824 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
 825 *
 826                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
 827      $                         WORK( ITAUQ ), WORK( ITAUP ),
 828      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 829                   CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
 830 *
 831 *                 Generate left vectors bidiagonalizing R in WORK(IR)
 832 *                 (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
 833 *
 834                   CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
 835      $                         WORK( ITAUQ ), WORK( IWORK ),
 836      $                         LWORK-IWORK+1, IERR )
 837 *
 838 *                 Generate right vectors bidiagonalizing R in VT
 839 *                 (Workspace: need N*N+4*N-1, prefer N*N+3*N+(N-1)*NB)
 840 *
 841                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
 842      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 843                   IWORK = IE + N
 844 *
 845 *                 Perform bidiagonal QR iteration, computing left
 846 *                 singular vectors of R in WORK(IR) and computing right
 847 *                 singular vectors of R in VT
 848 *                 (Workspace: need N*N+BDSPAC)
 849 *
 850                   CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
 851      $                         WORK( IR ), LDWRKR, DUM, 1,
 852      $                         WORK( IWORK ), INFO )
 853                   IU = IE + N
 854 *
 855 *                 Multiply Q in A by left singular vectors of R in
 856 *                 WORK(IR), storing result in WORK(IU) and copying to A
 857 *                 (Workspace: need N*N+2*N, prefer N*N+M*N+N)
 858 *
 859                   DO 20 I = 1, M, LDWRKU
 860                      CHUNK = MIN( M-I+1, LDWRKU )
 861                      CALL DGEMM( 'N''N', CHUNK, N, N, ONE, A( I, 1 ),
 862      $                           LDA, WORK( IR ), LDWRKR, ZERO,
 863      $                           WORK( IU ), LDWRKU )
 864                      CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
 865      $                            A( I, 1 ), LDA )
 866    20             CONTINUE
 867 *
 868                ELSE
 869 *
 870 *                 Insufficient workspace for a fast algorithm
 871 *
 872                   ITAU = 1
 873                   IWORK = ITAU + N
 874 *
 875 *                 Compute A=Q*R
 876 *                 (Workspace: need 2*N, prefer N+N*NB)
 877 *
 878                   CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
 879      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 880 *
 881 *                 Copy R to VT, zeroing out below it
 882 *
 883                   CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
 884                   IF( N.GT.1 )
 885      $               CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
 886      $                            VT( 21 ), LDVT )
 887 *
 888 *                 Generate Q in A
 889 *                 (Workspace: need 2*N, prefer N+N*NB)
 890 *
 891                   CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
 892      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 893                   IE = ITAU
 894                   ITAUQ = IE + N
 895                   ITAUP = ITAUQ + N
 896                   IWORK = ITAUP + N
 897 *
 898 *                 Bidiagonalize R in VT
 899 *                 (Workspace: need 4*N, prefer 3*N+2*N*NB)
 900 *
 901                   CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
 902      $                         WORK( ITAUQ ), WORK( ITAUP ),
 903      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 904 *
 905 *                 Multiply Q in A by left vectors bidiagonalizing R
 906 *                 (Workspace: need 3*N+M, prefer 3*N+M*NB)
 907 *
 908                   CALL DORMBR( 'Q''R''N', M, N, N, VT, LDVT,
 909      $                         WORK( ITAUQ ), A, LDA, WORK( IWORK ),
 910      $                         LWORK-IWORK+1, IERR )
 911 *
 912 *                 Generate right vectors bidiagonalizing R in VT
 913 *                 (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
 914 *
 915                   CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
 916      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
 917                   IWORK = IE + N
 918 *
 919 *                 Perform bidiagonal QR iteration, computing left
 920 *                 singular vectors of A in A and computing right
 921 *                 singular vectors of A in VT
 922 *                 (Workspace: need BDSPAC)
 923 *
 924                   CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
 925      $                         A, LDA, DUM, 1, WORK( IWORK ), INFO )
 926 *
 927                END IF
 928 *
 929             ELSE IF( WNTUS ) THEN
 930 *
 931                IF( WNTVN ) THEN
 932 *
 933 *                 Path 4 (M much larger than N, JOBU='S', JOBVT='N')
 934 *                 N left singular vectors to be computed in U and
 935 *                 no right singular vectors to be computed
 936 *
 937                   IF( LWORK.GE.N*N+MAX4*N, BDSPAC ) ) THEN
 938 *
 939 *                    Sufficient workspace for a fast algorithm
 940 *
 941                      IR = 1
 942                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
 943 *
 944 *                       WORK(IR) is LDA by N
 945 *
 946                         LDWRKR = LDA
 947                      ELSE
 948 *
 949 *                       WORK(IR) is N by N
 950 *
 951                         LDWRKR = N
 952                      END IF
 953                      ITAU = IR + LDWRKR*N
 954                      IWORK = ITAU + N
 955 *
 956 *                    Compute A=Q*R
 957 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 958 *
 959                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
 960      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 961 *
 962 *                    Copy R to WORK(IR), zeroing out below it
 963 *
 964                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
 965      $                            LDWRKR )
 966                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
 967      $                            WORK( IR+1 ), LDWRKR )
 968 *
 969 *                    Generate Q in A
 970 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
 971 *
 972                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
 973      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
 974                      IE = ITAU
 975                      ITAUQ = IE + N
 976                      ITAUP = ITAUQ + N
 977                      IWORK = ITAUP + N
 978 *
 979 *                    Bidiagonalize R in WORK(IR)
 980 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
 981 *
 982                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
 983      $                            WORK( IE ), WORK( ITAUQ ),
 984      $                            WORK( ITAUP ), WORK( IWORK ),
 985      $                            LWORK-IWORK+1, IERR )
 986 *
 987 *                    Generate left vectors bidiagonalizing R in WORK(IR)
 988 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
 989 *
 990                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
 991      $                            WORK( ITAUQ ), WORK( IWORK ),
 992      $                            LWORK-IWORK+1, IERR )
 993                      IWORK = IE + N
 994 *
 995 *                    Perform bidiagonal QR iteration, computing left
 996 *                    singular vectors of R in WORK(IR)
 997 *                    (Workspace: need N*N+BDSPAC)
 998 *
 999                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1000      $                            1, WORK( IR ), LDWRKR, DUM, 1,
1001      $                            WORK( IWORK ), INFO )
1002 *
1003 *                    Multiply Q in A by left singular vectors of R in
1004 *                    WORK(IR), storing result in U
1005 *                    (Workspace: need N*N)
1006 *
1007                      CALL DGEMM( 'N''N', M, N, N, ONE, A, LDA,
1008      $                           WORK( IR ), LDWRKR, ZERO, U, LDU )
1009 *
1010                   ELSE
1011 *
1012 *                    Insufficient workspace for a fast algorithm
1013 *
1014                      ITAU = 1
1015                      IWORK = ITAU + N
1016 *
1017 *                    Compute A=Q*R, copying result to U
1018 *                    (Workspace: need 2*N, prefer N+N*NB)
1019 *
1020                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1021      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1022                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1023 *
1024 *                    Generate Q in U
1025 *                    (Workspace: need 2*N, prefer N+N*NB)
1026 *
1027                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1028      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1029                      IE = ITAU
1030                      ITAUQ = IE + N
1031                      ITAUP = ITAUQ + N
1032                      IWORK = ITAUP + N
1033 *
1034 *                    Zero out below R in A
1035 *
1036                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 21 ),
1037      $                            LDA )
1038 *
1039 *                    Bidiagonalize R in A
1040 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1041 *
1042                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1043      $                            WORK( ITAUQ ), WORK( ITAUP ),
1044      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1045 *
1046 *                    Multiply Q in U by left vectors bidiagonalizing R
1047 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1048 *
1049                      CALL DORMBR( 'Q''R''N', M, N, N, A, LDA,
1050      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1051      $                            LWORK-IWORK+1, IERR )
1052                      IWORK = IE + N
1053 *
1054 *                    Perform bidiagonal QR iteration, computing left
1055 *                    singular vectors of A in U
1056 *                    (Workspace: need BDSPAC)
1057 *
1058                      CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1059      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1060      $                            INFO )
1061 *
1062                   END IF
1063 *
1064                ELSE IF( WNTVO ) THEN
1065 *
1066 *                 Path 5 (M much larger than N, JOBU='S', JOBVT='O')
1067 *                 N left singular vectors to be computed in U and
1068 *                 N right singular vectors to be overwritten on A
1069 *
1070                   IF( LWORK.GE.2*N*N+MAX4*N, BDSPAC ) ) THEN
1071 *
1072 *                    Sufficient workspace for a fast algorithm
1073 *
1074                      IU = 1
1075                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1076 *
1077 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1078 *
1079                         LDWRKU = LDA
1080                         IR = IU + LDWRKU*N
1081                         LDWRKR = LDA
1082                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1083 *
1084 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1085 *
1086                         LDWRKU = LDA
1087                         IR = IU + LDWRKU*N
1088                         LDWRKR = N
1089                      ELSE
1090 *
1091 *                       WORK(IU) is N by N and WORK(IR) is N by N
1092 *
1093                         LDWRKU = N
1094                         IR = IU + LDWRKU*N
1095                         LDWRKR = N
1096                      END IF
1097                      ITAU = IR + LDWRKR*N
1098                      IWORK = ITAU + N
1099 *
1100 *                    Compute A=Q*R
1101 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1102 *
1103                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1104      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1105 *
1106 *                    Copy R to WORK(IU), zeroing out below it
1107 *
1108                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1109      $                            LDWRKU )
1110                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1111      $                            WORK( IU+1 ), LDWRKU )
1112 *
1113 *                    Generate Q in A
1114 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1115 *
1116                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1117      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1118                      IE = ITAU
1119                      ITAUQ = IE + N
1120                      ITAUP = ITAUQ + N
1121                      IWORK = ITAUP + N
1122 *
1123 *                    Bidiagonalize R in WORK(IU), copying result to
1124 *                    WORK(IR)
1125 *                    (Workspace: need 2*N*N+4*N,
1126 *                                prefer 2*N*N+3*N+2*N*NB)
1127 *
1128                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1129      $                            WORK( IE ), WORK( ITAUQ ),
1130      $                            WORK( ITAUP ), WORK( IWORK ),
1131      $                            LWORK-IWORK+1, IERR )
1132                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1133      $                            WORK( IR ), LDWRKR )
1134 *
1135 *                    Generate left bidiagonalizing vectors in WORK(IU)
1136 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1137 *
1138                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1139      $                            WORK( ITAUQ ), WORK( IWORK ),
1140      $                            LWORK-IWORK+1, IERR )
1141 *
1142 *                    Generate right bidiagonalizing vectors in WORK(IR)
1143 *                    (Workspace: need 2*N*N+4*N-1,
1144 *                                prefer 2*N*N+3*N+(N-1)*NB)
1145 *
1146                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1147      $                            WORK( ITAUP ), WORK( IWORK ),
1148      $                            LWORK-IWORK+1, IERR )
1149                      IWORK = IE + N
1150 *
1151 *                    Perform bidiagonal QR iteration, computing left
1152 *                    singular vectors of R in WORK(IU) and computing
1153 *                    right singular vectors of R in WORK(IR)
1154 *                    (Workspace: need 2*N*N+BDSPAC)
1155 *
1156                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1157      $                            WORK( IR ), LDWRKR, WORK( IU ),
1158      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1159 *
1160 *                    Multiply Q in A by left singular vectors of R in
1161 *                    WORK(IU), storing result in U
1162 *                    (Workspace: need N*N)
1163 *
1164                      CALL DGEMM( 'N''N', M, N, N, ONE, A, LDA,
1165      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1166 *
1167 *                    Copy right singular vectors of R to A
1168 *                    (Workspace: need N*N)
1169 *
1170                      CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1171      $                            LDA )
1172 *
1173                   ELSE
1174 *
1175 *                    Insufficient workspace for a fast algorithm
1176 *
1177                      ITAU = 1
1178                      IWORK = ITAU + N
1179 *
1180 *                    Compute A=Q*R, copying result to U
1181 *                    (Workspace: need 2*N, prefer N+N*NB)
1182 *
1183                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1184      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1185                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1186 *
1187 *                    Generate Q in U
1188 *                    (Workspace: need 2*N, prefer N+N*NB)
1189 *
1190                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1191      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1192                      IE = ITAU
1193                      ITAUQ = IE + N
1194                      ITAUP = ITAUQ + N
1195                      IWORK = ITAUP + N
1196 *
1197 *                    Zero out below R in A
1198 *
1199                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 21 ),
1200      $                            LDA )
1201 *
1202 *                    Bidiagonalize R in A
1203 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1204 *
1205                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1206      $                            WORK( ITAUQ ), WORK( ITAUP ),
1207      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1208 *
1209 *                    Multiply Q in U by left vectors bidiagonalizing R
1210 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1211 *
1212                      CALL DORMBR( 'Q''R''N', M, N, N, A, LDA,
1213      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1214      $                            LWORK-IWORK+1, IERR )
1215 *
1216 *                    Generate right vectors bidiagonalizing R in A
1217 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1218 *
1219                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1220      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1221                      IWORK = IE + N
1222 *
1223 *                    Perform bidiagonal QR iteration, computing left
1224 *                    singular vectors of A in U and computing right
1225 *                    singular vectors of A in A
1226 *                    (Workspace: need BDSPAC)
1227 *
1228                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1229      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1230      $                            INFO )
1231 *
1232                   END IF
1233 *
1234                ELSE IF( WNTVAS ) THEN
1235 *
1236 *                 Path 6 (M much larger than N, JOBU='S', JOBVT='S'
1237 *                         or 'A')
1238 *                 N left singular vectors to be computed in U and
1239 *                 N right singular vectors to be computed in VT
1240 *
1241                   IF( LWORK.GE.N*N+MAX4*N, BDSPAC ) ) THEN
1242 *
1243 *                    Sufficient workspace for a fast algorithm
1244 *
1245                      IU = 1
1246                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1247 *
1248 *                       WORK(IU) is LDA by N
1249 *
1250                         LDWRKU = LDA
1251                      ELSE
1252 *
1253 *                       WORK(IU) is N by N
1254 *
1255                         LDWRKU = N
1256                      END IF
1257                      ITAU = IU + LDWRKU*N
1258                      IWORK = ITAU + N
1259 *
1260 *                    Compute A=Q*R
1261 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1262 *
1263                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1264      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1265 *
1266 *                    Copy R to WORK(IU), zeroing out below it
1267 *
1268                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1269      $                            LDWRKU )
1270                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1271      $                            WORK( IU+1 ), LDWRKU )
1272 *
1273 *                    Generate Q in A
1274 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1275 *
1276                      CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
1277      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1278                      IE = ITAU
1279                      ITAUQ = IE + N
1280                      ITAUP = ITAUQ + N
1281                      IWORK = ITAUP + N
1282 *
1283 *                    Bidiagonalize R in WORK(IU), copying result to VT
1284 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1285 *
1286                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1287      $                            WORK( IE ), WORK( ITAUQ ),
1288      $                            WORK( ITAUP ), WORK( IWORK ),
1289      $                            LWORK-IWORK+1, IERR )
1290                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1291      $                            LDVT )
1292 *
1293 *                    Generate left bidiagonalizing vectors in WORK(IU)
1294 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1295 *
1296                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1297      $                            WORK( ITAUQ ), WORK( IWORK ),
1298      $                            LWORK-IWORK+1, IERR )
1299 *
1300 *                    Generate right bidiagonalizing vectors in VT
1301 *                    (Workspace: need N*N+4*N-1,
1302 *                                prefer N*N+3*N+(N-1)*NB)
1303 *
1304                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1305      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1306                      IWORK = IE + N
1307 *
1308 *                    Perform bidiagonal QR iteration, computing left
1309 *                    singular vectors of R in WORK(IU) and computing
1310 *                    right singular vectors of R in VT
1311 *                    (Workspace: need N*N+BDSPAC)
1312 *
1313                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1314      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1315      $                            WORK( IWORK ), INFO )
1316 *
1317 *                    Multiply Q in A by left singular vectors of R in
1318 *                    WORK(IU), storing result in U
1319 *                    (Workspace: need N*N)
1320 *
1321                      CALL DGEMM( 'N''N', M, N, N, ONE, A, LDA,
1322      $                           WORK( IU ), LDWRKU, ZERO, U, LDU )
1323 *
1324                   ELSE
1325 *
1326 *                    Insufficient workspace for a fast algorithm
1327 *
1328                      ITAU = 1
1329                      IWORK = ITAU + N
1330 *
1331 *                    Compute A=Q*R, copying result to U
1332 *                    (Workspace: need 2*N, prefer N+N*NB)
1333 *
1334                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1335      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1336                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1337 *
1338 *                    Generate Q in U
1339 *                    (Workspace: need 2*N, prefer N+N*NB)
1340 *
1341                      CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
1342      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1343 *
1344 *                    Copy R to VT, zeroing out below it
1345 *
1346                      CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1347                      IF( N.GT.1 )
1348      $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1349      $                               VT( 21 ), LDVT )
1350                      IE = ITAU
1351                      ITAUQ = IE + N
1352                      ITAUP = ITAUQ + N
1353                      IWORK = ITAUP + N
1354 *
1355 *                    Bidiagonalize R in VT
1356 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1357 *
1358                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1359      $                            WORK( ITAUQ ), WORK( ITAUP ),
1360      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1361 *
1362 *                    Multiply Q in U by left bidiagonalizing vectors
1363 *                    in VT
1364 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1365 *
1366                      CALL DORMBR( 'Q''R''N', M, N, N, VT, LDVT,
1367      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1368      $                            LWORK-IWORK+1, IERR )
1369 *
1370 *                    Generate right bidiagonalizing vectors in VT
1371 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1372 *
1373                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1374      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1375                      IWORK = IE + N
1376 *
1377 *                    Perform bidiagonal QR iteration, computing left
1378 *                    singular vectors of A in U and computing right
1379 *                    singular vectors of A in VT
1380 *                    (Workspace: need BDSPAC)
1381 *
1382                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1383      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1384      $                            INFO )
1385 *
1386                   END IF
1387 *
1388                END IF
1389 *
1390             ELSE IF( WNTUA ) THEN
1391 *
1392                IF( WNTVN ) THEN
1393 *
1394 *                 Path 7 (M much larger than N, JOBU='A', JOBVT='N')
1395 *                 M left singular vectors to be computed in U and
1396 *                 no right singular vectors to be computed
1397 *
1398                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1399 *
1400 *                    Sufficient workspace for a fast algorithm
1401 *
1402                      IR = 1
1403                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1404 *
1405 *                       WORK(IR) is LDA by N
1406 *
1407                         LDWRKR = LDA
1408                      ELSE
1409 *
1410 *                       WORK(IR) is N by N
1411 *
1412                         LDWRKR = N
1413                      END IF
1414                      ITAU = IR + LDWRKR*N
1415                      IWORK = ITAU + N
1416 *
1417 *                    Compute A=Q*R, copying result to U
1418 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1419 *
1420                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1421      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1422                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1423 *
1424 *                    Copy R to WORK(IR), zeroing out below it
1425 *
1426                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
1427      $                            LDWRKR )
1428                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1429      $                            WORK( IR+1 ), LDWRKR )
1430 *
1431 *                    Generate Q in U
1432 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1433 *
1434                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1435      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1436                      IE = ITAU
1437                      ITAUQ = IE + N
1438                      ITAUP = ITAUQ + N
1439                      IWORK = ITAUP + N
1440 *
1441 *                    Bidiagonalize R in WORK(IR)
1442 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1443 *
1444                      CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
1445      $                            WORK( IE ), WORK( ITAUQ ),
1446      $                            WORK( ITAUP ), WORK( IWORK ),
1447      $                            LWORK-IWORK+1, IERR )
1448 *
1449 *                    Generate left bidiagonalizing vectors in WORK(IR)
1450 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1451 *
1452                      CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
1453      $                            WORK( ITAUQ ), WORK( IWORK ),
1454      $                            LWORK-IWORK+1, IERR )
1455                      IWORK = IE + N
1456 *
1457 *                    Perform bidiagonal QR iteration, computing left
1458 *                    singular vectors of R in WORK(IR)
1459 *                    (Workspace: need N*N+BDSPAC)
1460 *
1461                      CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
1462      $                            1, WORK( IR ), LDWRKR, DUM, 1,
1463      $                            WORK( IWORK ), INFO )
1464 *
1465 *                    Multiply Q in U by left singular vectors of R in
1466 *                    WORK(IR), storing result in A
1467 *                    (Workspace: need N*N)
1468 *
1469                      CALL DGEMM( 'N''N', M, N, N, ONE, U, LDU,
1470      $                           WORK( IR ), LDWRKR, ZERO, A, LDA )
1471 *
1472 *                    Copy left singular vectors of A from A to U
1473 *
1474                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1475 *
1476                   ELSE
1477 *
1478 *                    Insufficient workspace for a fast algorithm
1479 *
1480                      ITAU = 1
1481                      IWORK = ITAU + N
1482 *
1483 *                    Compute A=Q*R, copying result to U
1484 *                    (Workspace: need 2*N, prefer N+N*NB)
1485 *
1486                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1487      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1488                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1489 *
1490 *                    Generate Q in U
1491 *                    (Workspace: need N+M, prefer N+M*NB)
1492 *
1493                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1494      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1495                      IE = ITAU
1496                      ITAUQ = IE + N
1497                      ITAUP = ITAUQ + N
1498                      IWORK = ITAUP + N
1499 *
1500 *                    Zero out below R in A
1501 *
1502                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 21 ),
1503      $                            LDA )
1504 *
1505 *                    Bidiagonalize R in A
1506 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1507 *
1508                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1509      $                            WORK( ITAUQ ), WORK( ITAUP ),
1510      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1511 *
1512 *                    Multiply Q in U by left bidiagonalizing vectors
1513 *                    in A
1514 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1515 *
1516                      CALL DORMBR( 'Q''R''N', M, N, N, A, LDA,
1517      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1518      $                            LWORK-IWORK+1, IERR )
1519                      IWORK = IE + N
1520 *
1521 *                    Perform bidiagonal QR iteration, computing left
1522 *                    singular vectors of A in U
1523 *                    (Workspace: need BDSPAC)
1524 *
1525                      CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
1526      $                            1, U, LDU, DUM, 1, WORK( IWORK ),
1527      $                            INFO )
1528 *
1529                   END IF
1530 *
1531                ELSE IF( WNTVO ) THEN
1532 *
1533 *                 Path 8 (M much larger than N, JOBU='A', JOBVT='O')
1534 *                 M left singular vectors to be computed in U and
1535 *                 N right singular vectors to be overwritten on A
1536 *
1537                   IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1538 *
1539 *                    Sufficient workspace for a fast algorithm
1540 *
1541                      IU = 1
1542                      IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
1543 *
1544 *                       WORK(IU) is LDA by N and WORK(IR) is LDA by N
1545 *
1546                         LDWRKU = LDA
1547                         IR = IU + LDWRKU*N
1548                         LDWRKR = LDA
1549                      ELSE IF( LWORK.GE.WRKBL+( LDA+N )*N ) THEN
1550 *
1551 *                       WORK(IU) is LDA by N and WORK(IR) is N by N
1552 *
1553                         LDWRKU = LDA
1554                         IR = IU + LDWRKU*N
1555                         LDWRKR = N
1556                      ELSE
1557 *
1558 *                       WORK(IU) is N by N and WORK(IR) is N by N
1559 *
1560                         LDWRKU = N
1561                         IR = IU + LDWRKU*N
1562                         LDWRKR = N
1563                      END IF
1564                      ITAU = IR + LDWRKR*N
1565                      IWORK = ITAU + N
1566 *
1567 *                    Compute A=Q*R, copying result to U
1568 *                    (Workspace: need 2*N*N+2*N, prefer 2*N*N+N+N*NB)
1569 *
1570                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1571      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1572                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1573 *
1574 *                    Generate Q in U
1575 *                    (Workspace: need 2*N*N+N+M, prefer 2*N*N+N+M*NB)
1576 *
1577                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1578      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1579 *
1580 *                    Copy R to WORK(IU), zeroing out below it
1581 *
1582                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1583      $                            LDWRKU )
1584                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1585      $                            WORK( IU+1 ), LDWRKU )
1586                      IE = ITAU
1587                      ITAUQ = IE + N
1588                      ITAUP = ITAUQ + N
1589                      IWORK = ITAUP + N
1590 *
1591 *                    Bidiagonalize R in WORK(IU), copying result to
1592 *                    WORK(IR)
1593 *                    (Workspace: need 2*N*N+4*N,
1594 *                                prefer 2*N*N+3*N+2*N*NB)
1595 *
1596                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1597      $                            WORK( IE ), WORK( ITAUQ ),
1598      $                            WORK( ITAUP ), WORK( IWORK ),
1599      $                            LWORK-IWORK+1, IERR )
1600                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
1601      $                            WORK( IR ), LDWRKR )
1602 *
1603 *                    Generate left bidiagonalizing vectors in WORK(IU)
1604 *                    (Workspace: need 2*N*N+4*N, prefer 2*N*N+3*N+N*NB)
1605 *
1606                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1607      $                            WORK( ITAUQ ), WORK( IWORK ),
1608      $                            LWORK-IWORK+1, IERR )
1609 *
1610 *                    Generate right bidiagonalizing vectors in WORK(IR)
1611 *                    (Workspace: need 2*N*N+4*N-1,
1612 *                                prefer 2*N*N+3*N+(N-1)*NB)
1613 *
1614                      CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
1615      $                            WORK( ITAUP ), WORK( IWORK ),
1616      $                            LWORK-IWORK+1, IERR )
1617                      IWORK = IE + N
1618 *
1619 *                    Perform bidiagonal QR iteration, computing left
1620 *                    singular vectors of R in WORK(IU) and computing
1621 *                    right singular vectors of R in WORK(IR)
1622 *                    (Workspace: need 2*N*N+BDSPAC)
1623 *
1624                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
1625      $                            WORK( IR ), LDWRKR, WORK( IU ),
1626      $                            LDWRKU, DUM, 1, WORK( IWORK ), INFO )
1627 *
1628 *                    Multiply Q in U by left singular vectors of R in
1629 *                    WORK(IU), storing result in A
1630 *                    (Workspace: need N*N)
1631 *
1632                      CALL DGEMM( 'N''N', M, N, N, ONE, U, LDU,
1633      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1634 *
1635 *                    Copy left singular vectors of A from A to U
1636 *
1637                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1638 *
1639 *                    Copy right singular vectors of R from WORK(IR) to A
1640 *
1641                      CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
1642      $                            LDA )
1643 *
1644                   ELSE
1645 *
1646 *                    Insufficient workspace for a fast algorithm
1647 *
1648                      ITAU = 1
1649                      IWORK = ITAU + N
1650 *
1651 *                    Compute A=Q*R, copying result to U
1652 *                    (Workspace: need 2*N, prefer N+N*NB)
1653 *
1654                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1655      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1656                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1657 *
1658 *                    Generate Q in U
1659 *                    (Workspace: need N+M, prefer N+M*NB)
1660 *
1661                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1662      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1663                      IE = ITAU
1664                      ITAUQ = IE + N
1665                      ITAUP = ITAUQ + N
1666                      IWORK = ITAUP + N
1667 *
1668 *                    Zero out below R in A
1669 *
1670                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 21 ),
1671      $                            LDA )
1672 *
1673 *                    Bidiagonalize R in A
1674 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1675 *
1676                      CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
1677      $                            WORK( ITAUQ ), WORK( ITAUP ),
1678      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1679 *
1680 *                    Multiply Q in U by left bidiagonalizing vectors
1681 *                    in A
1682 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1683 *
1684                      CALL DORMBR( 'Q''R''N', M, N, N, A, LDA,
1685      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1686      $                            LWORK-IWORK+1, IERR )
1687 *
1688 *                    Generate right bidiagonalizing vectors in A
1689 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1690 *
1691                      CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1692      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1693                      IWORK = IE + N
1694 *
1695 *                    Perform bidiagonal QR iteration, computing left
1696 *                    singular vectors of A in U and computing right
1697 *                    singular vectors of A in A
1698 *                    (Workspace: need BDSPAC)
1699 *
1700                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
1701      $                            LDA, U, LDU, DUM, 1, WORK( IWORK ),
1702      $                            INFO )
1703 *
1704                   END IF
1705 *
1706                ELSE IF( WNTVAS ) THEN
1707 *
1708 *                 Path 9 (M much larger than N, JOBU='A', JOBVT='S'
1709 *                         or 'A')
1710 *                 M left singular vectors to be computed in U and
1711 *                 N right singular vectors to be computed in VT
1712 *
1713                   IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
1714 *
1715 *                    Sufficient workspace for a fast algorithm
1716 *
1717                      IU = 1
1718                      IF( LWORK.GE.WRKBL+LDA*N ) THEN
1719 *
1720 *                       WORK(IU) is LDA by N
1721 *
1722                         LDWRKU = LDA
1723                      ELSE
1724 *
1725 *                       WORK(IU) is N by N
1726 *
1727                         LDWRKU = N
1728                      END IF
1729                      ITAU = IU + LDWRKU*N
1730                      IWORK = ITAU + N
1731 *
1732 *                    Compute A=Q*R, copying result to U
1733 *                    (Workspace: need N*N+2*N, prefer N*N+N+N*NB)
1734 *
1735                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1736      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1737                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1738 *
1739 *                    Generate Q in U
1740 *                    (Workspace: need N*N+N+M, prefer N*N+N+M*NB)
1741 *
1742                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1743      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1744 *
1745 *                    Copy R to WORK(IU), zeroing out below it
1746 *
1747                      CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
1748      $                            LDWRKU )
1749                      CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1750      $                            WORK( IU+1 ), LDWRKU )
1751                      IE = ITAU
1752                      ITAUQ = IE + N
1753                      ITAUP = ITAUQ + N
1754                      IWORK = ITAUP + N
1755 *
1756 *                    Bidiagonalize R in WORK(IU), copying result to VT
1757 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+2*N*NB)
1758 *
1759                      CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
1760      $                            WORK( IE ), WORK( ITAUQ ),
1761      $                            WORK( ITAUP ), WORK( IWORK ),
1762      $                            LWORK-IWORK+1, IERR )
1763                      CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
1764      $                            LDVT )
1765 *
1766 *                    Generate left bidiagonalizing vectors in WORK(IU)
1767 *                    (Workspace: need N*N+4*N, prefer N*N+3*N+N*NB)
1768 *
1769                      CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
1770      $                            WORK( ITAUQ ), WORK( IWORK ),
1771      $                            LWORK-IWORK+1, IERR )
1772 *
1773 *                    Generate right bidiagonalizing vectors in VT
1774 *                    (Workspace: need N*N+4*N-1,
1775 *                                prefer N*N+3*N+(N-1)*NB)
1776 *
1777                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1778      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1779                      IWORK = IE + N
1780 *
1781 *                    Perform bidiagonal QR iteration, computing left
1782 *                    singular vectors of R in WORK(IU) and computing
1783 *                    right singular vectors of R in VT
1784 *                    (Workspace: need N*N+BDSPAC)
1785 *
1786                      CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
1787      $                            LDVT, WORK( IU ), LDWRKU, DUM, 1,
1788      $                            WORK( IWORK ), INFO )
1789 *
1790 *                    Multiply Q in U by left singular vectors of R in
1791 *                    WORK(IU), storing result in A
1792 *                    (Workspace: need N*N)
1793 *
1794                      CALL DGEMM( 'N''N', M, N, N, ONE, U, LDU,
1795      $                           WORK( IU ), LDWRKU, ZERO, A, LDA )
1796 *
1797 *                    Copy left singular vectors of A from A to U
1798 *
1799                      CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
1800 *
1801                   ELSE
1802 *
1803 *                    Insufficient workspace for a fast algorithm
1804 *
1805                      ITAU = 1
1806                      IWORK = ITAU + N
1807 *
1808 *                    Compute A=Q*R, copying result to U
1809 *                    (Workspace: need 2*N, prefer N+N*NB)
1810 *
1811                      CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
1812      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1813                      CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1814 *
1815 *                    Generate Q in U
1816 *                    (Workspace: need N+M, prefer N+M*NB)
1817 *
1818                      CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
1819      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1820 *
1821 *                    Copy R from A to VT, zeroing out below it
1822 *
1823                      CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1824                      IF( N.GT.1 )
1825      $                  CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
1826      $                               VT( 21 ), LDVT )
1827                      IE = ITAU
1828                      ITAUQ = IE + N
1829                      ITAUP = ITAUQ + N
1830                      IWORK = ITAUP + N
1831 *
1832 *                    Bidiagonalize R in VT
1833 *                    (Workspace: need 4*N, prefer 3*N+2*N*NB)
1834 *
1835                      CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
1836      $                            WORK( ITAUQ ), WORK( ITAUP ),
1837      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1838 *
1839 *                    Multiply Q in U by left bidiagonalizing vectors
1840 *                    in VT
1841 *                    (Workspace: need 3*N+M, prefer 3*N+M*NB)
1842 *
1843                      CALL DORMBR( 'Q''R''N', M, N, N, VT, LDVT,
1844      $                            WORK( ITAUQ ), U, LDU, WORK( IWORK ),
1845      $                            LWORK-IWORK+1, IERR )
1846 *
1847 *                    Generate right bidiagonalizing vectors in VT
1848 *                    (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1849 *
1850                      CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1851      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
1852                      IWORK = IE + N
1853 *
1854 *                    Perform bidiagonal QR iteration, computing left
1855 *                    singular vectors of A in U and computing right
1856 *                    singular vectors of A in VT
1857 *                    (Workspace: need BDSPAC)
1858 *
1859                      CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
1860      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
1861      $                            INFO )
1862 *
1863                   END IF
1864 *
1865                END IF
1866 *
1867             END IF
1868 *
1869          ELSE
1870 *
1871 *           M .LT. MNTHR
1872 *
1873 *           Path 10 (M at least N, but not much larger)
1874 *           Reduce to bidiagonal form without QR decomposition
1875 *
1876             IE = 1
1877             ITAUQ = IE + N
1878             ITAUP = ITAUQ + N
1879             IWORK = ITAUP + N
1880 *
1881 *           Bidiagonalize A
1882 *           (Workspace: need 3*N+M, prefer 3*N+(M+N)*NB)
1883 *
1884             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
1885      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
1886      $                   IERR )
1887             IF( WNTUAS ) THEN
1888 *
1889 *              If left singular vectors desired in U, copy result to U
1890 *              and generate left bidiagonalizing vectors in U
1891 *              (Workspace: need 3*N+NCU, prefer 3*N+NCU*NB)
1892 *
1893                CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
1894                IF( WNTUS )
1895      $            NCU = N
1896                IF( WNTUA )
1897      $            NCU = M
1898                CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
1899      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1900             END IF
1901             IF( WNTVAS ) THEN
1902 *
1903 *              If right singular vectors desired in VT, copy result to
1904 *              VT and generate right bidiagonalizing vectors in VT
1905 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1906 *
1907                CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
1908                CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
1909      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1910             END IF
1911             IF( WNTUO ) THEN
1912 *
1913 *              If left singular vectors desired in A, generate left
1914 *              bidiagonalizing vectors in A
1915 *              (Workspace: need 4*N, prefer 3*N+N*NB)
1916 *
1917                CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
1918      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1919             END IF
1920             IF( WNTVO ) THEN
1921 *
1922 *              If right singular vectors desired in A, generate right
1923 *              bidiagonalizing vectors in A
1924 *              (Workspace: need 4*N-1, prefer 3*N+(N-1)*NB)
1925 *
1926                CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
1927      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
1928             END IF
1929             IWORK = IE + N
1930             IF( WNTUAS .OR. WNTUO )
1931      $         NRU = M
1932             IF( WNTUN )
1933      $         NRU = 0
1934             IF( WNTVAS .OR. WNTVO )
1935      $         NCVT = N
1936             IF( WNTVN )
1937      $         NCVT = 0
1938             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
1939 *
1940 *              Perform bidiagonal QR iteration, if desired, computing
1941 *              left singular vectors in U and computing right singular
1942 *              vectors in VT
1943 *              (Workspace: need BDSPAC)
1944 *
1945                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1946      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
1947             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
1948 *
1949 *              Perform bidiagonal QR iteration, if desired, computing
1950 *              left singular vectors in U and computing right singular
1951 *              vectors in A
1952 *              (Workspace: need BDSPAC)
1953 *
1954                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
1955      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
1956             ELSE
1957 *
1958 *              Perform bidiagonal QR iteration, if desired, computing
1959 *              left singular vectors in A and computing right singular
1960 *              vectors in VT
1961 *              (Workspace: need BDSPAC)
1962 *
1963                CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
1964      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
1965             END IF
1966 *
1967          END IF
1968 *
1969       ELSE
1970 *
1971 *        A has more columns than rows. If A has sufficiently more
1972 *        columns than rows, first reduce using the LQ decomposition (if
1973 *        sufficient workspace available)
1974 *
1975          IF( N.GE.MNTHR ) THEN
1976 *
1977             IF( WNTVN ) THEN
1978 *
1979 *              Path 1t(N much larger than M, JOBVT='N')
1980 *              No right singular vectors to be computed
1981 *
1982                ITAU = 1
1983                IWORK = ITAU + M
1984 *
1985 *              Compute A=L*Q
1986 *              (Workspace: need 2*M, prefer M+M*NB)
1987 *
1988                CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
1989      $                      LWORK-IWORK+1, IERR )
1990 *
1991 *              Zero out above L
1992 *
1993                CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 12 ), LDA )
1994                IE = 1
1995                ITAUQ = IE + M
1996                ITAUP = ITAUQ + M
1997                IWORK = ITAUP + M
1998 *
1999 *              Bidiagonalize L in A
2000 *              (Workspace: need 4*M, prefer 3*M+2*M*NB)
2001 *
2002                CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
2003      $                      WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
2004      $                      IERR )
2005                IF( WNTUO .OR. WNTUAS ) THEN
2006 *
2007 *                 If left singular vectors desired, generate Q
2008 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2009 *
2010                   CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2011      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2012                END IF
2013                IWORK = IE + M
2014                NRU = 0
2015                IF( WNTUO .OR. WNTUAS )
2016      $            NRU = M
2017 *
2018 *              Perform bidiagonal QR iteration, computing left singular
2019 *              vectors of A in A if desired
2020 *              (Workspace: need BDSPAC)
2021 *
2022                CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
2023      $                      LDA, DUM, 1, WORK( IWORK ), INFO )
2024 *
2025 *              If left singular vectors desired in U, copy them there
2026 *
2027                IF( WNTUAS )
2028      $            CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
2029 *
2030             ELSE IF( WNTVO .AND. WNTUN ) THEN
2031 *
2032 *              Path 2t(N much larger than M, JOBU='N', JOBVT='O')
2033 *              M right singular vectors to be overwritten on A and
2034 *              no left singular vectors to be computed
2035 *
2036                IF( LWORK.GE.M*M+MAX4*M, BDSPAC ) ) THEN
2037 *
2038 *                 Sufficient workspace for a fast algorithm
2039 *
2040                   IR = 1
2041                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2042 *
2043 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2044 *
2045                      LDWRKU = LDA
2046                      CHUNK = N
2047                      LDWRKR = LDA
2048                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2049 *
2050 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2051 *
2052                      LDWRKU = LDA
2053                      CHUNK = N
2054                      LDWRKR = M
2055                   ELSE
2056 *
2057 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2058 *
2059                      LDWRKU = M
2060                      CHUNK = ( LWORK-M*M-M ) / M
2061                      LDWRKR = M
2062                   END IF
2063                   ITAU = IR + LDWRKR*M
2064                   IWORK = ITAU + M
2065 *
2066 *                 Compute A=L*Q
2067 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2068 *
2069                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2070      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2071 *
2072 *                 Copy L to WORK(IR) and zero out above it
2073 *
2074                   CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
2075                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2076      $                         WORK( IR+LDWRKR ), LDWRKR )
2077 *
2078 *                 Generate Q in A
2079 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2080 *
2081                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2082      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2083                   IE = ITAU
2084                   ITAUQ = IE + M
2085                   ITAUP = ITAUQ + M
2086                   IWORK = ITAUP + M
2087 *
2088 *                 Bidiagonalize L in WORK(IR)
2089 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2090 *
2091                   CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
2092      $                         WORK( ITAUQ ), WORK( ITAUP ),
2093      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2094 *
2095 *                 Generate right vectors bidiagonalizing L
2096 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2097 *
2098                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2099      $                         WORK( ITAUP ), WORK( IWORK ),
2100      $                         LWORK-IWORK+1, IERR )
2101                   IWORK = IE + M
2102 *
2103 *                 Perform bidiagonal QR iteration, computing right
2104 *                 singular vectors of L in WORK(IR)
2105 *                 (Workspace: need M*M+BDSPAC)
2106 *
2107                   CALL DBDSQR( 'U', M, M, 00, S, WORK( IE ),
2108      $                         WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2109      $                         WORK( IWORK ), INFO )
2110                   IU = IE + M
2111 *
2112 *                 Multiply right singular vectors of L in WORK(IR) by Q
2113 *                 in A, storing result in WORK(IU) and copying to A
2114 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M)
2115 *
2116                   DO 30 I = 1, N, CHUNK
2117                      BLK = MIN( N-I+1, CHUNK )
2118                      CALL DGEMM( 'N''N', M, BLK, M, ONE, WORK( IR ),
2119      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2120      $                           WORK( IU ), LDWRKU )
2121                      CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2122      $                            A( 1, I ), LDA )
2123    30             CONTINUE
2124 *
2125                ELSE
2126 *
2127 *                 Insufficient workspace for a fast algorithm
2128 *
2129                   IE = 1
2130                   ITAUQ = IE + M
2131                   ITAUP = ITAUQ + M
2132                   IWORK = ITAUP + M
2133 *
2134 *                 Bidiagonalize A
2135 *                 (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
2136 *
2137                   CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
2138      $                         WORK( ITAUQ ), WORK( ITAUP ),
2139      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2140 *
2141 *                 Generate right vectors bidiagonalizing A
2142 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2143 *
2144                   CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
2145      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2146                   IWORK = IE + M
2147 *
2148 *                 Perform bidiagonal QR iteration, computing right
2149 *                 singular vectors of A in A
2150 *                 (Workspace: need BDSPAC)
2151 *
2152                   CALL DBDSQR( 'L', M, N, 00, S, WORK( IE ), A, LDA,
2153      $                         DUM, 1, DUM, 1, WORK( IWORK ), INFO )
2154 *
2155                END IF
2156 *
2157             ELSE IF( WNTVO .AND. WNTUAS ) THEN
2158 *
2159 *              Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
2160 *              M right singular vectors to be overwritten on A and
2161 *              M left singular vectors to be computed in U
2162 *
2163                IF( LWORK.GE.M*M+MAX4*M, BDSPAC ) ) THEN
2164 *
2165 *                 Sufficient workspace for a fast algorithm
2166 *
2167                   IR = 1
2168                   IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+LDA*M ) THEN
2169 *
2170 *                    WORK(IU) is LDA by N and WORK(IR) is LDA by M
2171 *
2172                      LDWRKU = LDA
2173                      CHUNK = N
2174                      LDWRKR = LDA
2175                   ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N+M )+M*M ) THEN
2176 *
2177 *                    WORK(IU) is LDA by N and WORK(IR) is M by M
2178 *
2179                      LDWRKU = LDA
2180                      CHUNK = N
2181                      LDWRKR = M
2182                   ELSE
2183 *
2184 *                    WORK(IU) is M by CHUNK and WORK(IR) is M by M
2185 *
2186                      LDWRKU = M
2187                      CHUNK = ( LWORK-M*M-M ) / M
2188                      LDWRKR = M
2189                   END IF
2190                   ITAU = IR + LDWRKR*M
2191                   IWORK = ITAU + M
2192 *
2193 *                 Compute A=L*Q
2194 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2195 *
2196                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2197      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2198 *
2199 *                 Copy L to U, zeroing about above it
2200 *
2201                   CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2202                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 12 ),
2203      $                         LDU )
2204 *
2205 *                 Generate Q in A
2206 *                 (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2207 *
2208                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2209      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2210                   IE = ITAU
2211                   ITAUQ = IE + M
2212                   ITAUP = ITAUQ + M
2213                   IWORK = ITAUP + M
2214 *
2215 *                 Bidiagonalize L in U, copying result to WORK(IR)
2216 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2217 *
2218                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2219      $                         WORK( ITAUQ ), WORK( ITAUP ),
2220      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2221                   CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
2222 *
2223 *                 Generate right vectors bidiagonalizing L in WORK(IR)
2224 *                 (Workspace: need M*M+4*M-1, prefer M*M+3*M+(M-1)*NB)
2225 *
2226                   CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2227      $                         WORK( ITAUP ), WORK( IWORK ),
2228      $                         LWORK-IWORK+1, IERR )
2229 *
2230 *                 Generate left vectors bidiagonalizing L in U
2231 *                 (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2232 *
2233                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2234      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2235                   IWORK = IE + M
2236 *
2237 *                 Perform bidiagonal QR iteration, computing left
2238 *                 singular vectors of L in U, and computing right
2239 *                 singular vectors of L in WORK(IR)
2240 *                 (Workspace: need M*M+BDSPAC)
2241 *
2242                   CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2243      $                         WORK( IR ), LDWRKR, U, LDU, DUM, 1,
2244      $                         WORK( IWORK ), INFO )
2245                   IU = IE + M
2246 *
2247 *                 Multiply right singular vectors of L in WORK(IR) by Q
2248 *                 in A, storing result in WORK(IU) and copying to A
2249 *                 (Workspace: need M*M+2*M, prefer M*M+M*N+M))
2250 *
2251                   DO 40 I = 1, N, CHUNK
2252                      BLK = MIN( N-I+1, CHUNK )
2253                      CALL DGEMM( 'N''N', M, BLK, M, ONE, WORK( IR ),
2254      $                           LDWRKR, A( 1, I ), LDA, ZERO,
2255      $                           WORK( IU ), LDWRKU )
2256                      CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
2257      $                            A( 1, I ), LDA )
2258    40             CONTINUE
2259 *
2260                ELSE
2261 *
2262 *                 Insufficient workspace for a fast algorithm
2263 *
2264                   ITAU = 1
2265                   IWORK = ITAU + M
2266 *
2267 *                 Compute A=L*Q
2268 *                 (Workspace: need 2*M, prefer M+M*NB)
2269 *
2270                   CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2271      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2272 *
2273 *                 Copy L to U, zeroing out above it
2274 *
2275                   CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2276                   CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 12 ),
2277      $                         LDU )
2278 *
2279 *                 Generate Q in A
2280 *                 (Workspace: need 2*M, prefer M+M*NB)
2281 *
2282                   CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2283      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2284                   IE = ITAU
2285                   ITAUQ = IE + M
2286                   ITAUP = ITAUQ + M
2287                   IWORK = ITAUP + M
2288 *
2289 *                 Bidiagonalize L in U
2290 *                 (Workspace: need 4*M, prefer 3*M+2*M*NB)
2291 *
2292                   CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2293      $                         WORK( ITAUQ ), WORK( ITAUP ),
2294      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2295 *
2296 *                 Multiply right vectors bidiagonalizing L by Q in A
2297 *                 (Workspace: need 3*M+N, prefer 3*M+N*NB)
2298 *
2299                   CALL DORMBR( 'P''L''T', M, N, M, U, LDU,
2300      $                         WORK( ITAUP ), A, LDA, WORK( IWORK ),
2301      $                         LWORK-IWORK+1, IERR )
2302 *
2303 *                 Generate left vectors bidiagonalizing L in U
2304 *                 (Workspace: need 4*M, prefer 3*M+M*NB)
2305 *
2306                   CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2307      $                         WORK( IWORK ), LWORK-IWORK+1, IERR )
2308                   IWORK = IE + M
2309 *
2310 *                 Perform bidiagonal QR iteration, computing left
2311 *                 singular vectors of A in U and computing right
2312 *                 singular vectors of A in A
2313 *                 (Workspace: need BDSPAC)
2314 *
2315                   CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
2316      $                         U, LDU, DUM, 1, WORK( IWORK ), INFO )
2317 *
2318                END IF
2319 *
2320             ELSE IF( WNTVS ) THEN
2321 *
2322                IF( WNTUN ) THEN
2323 *
2324 *                 Path 4t(N much larger than M, JOBU='N', JOBVT='S')
2325 *                 M right singular vectors to be computed in VT and
2326 *                 no left singular vectors to be computed
2327 *
2328                   IF( LWORK.GE.M*M+MAX4*M, BDSPAC ) ) THEN
2329 *
2330 *                    Sufficient workspace for a fast algorithm
2331 *
2332                      IR = 1
2333                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2334 *
2335 *                       WORK(IR) is LDA by M
2336 *
2337                         LDWRKR = LDA
2338                      ELSE
2339 *
2340 *                       WORK(IR) is M by M
2341 *
2342                         LDWRKR = M
2343                      END IF
2344                      ITAU = IR + LDWRKR*M
2345                      IWORK = ITAU + M
2346 *
2347 *                    Compute A=L*Q
2348 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2349 *
2350                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2351      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2352 *
2353 *                    Copy L to WORK(IR), zeroing out above it
2354 *
2355                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2356      $                            LDWRKR )
2357                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2358      $                            WORK( IR+LDWRKR ), LDWRKR )
2359 *
2360 *                    Generate Q in A
2361 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2362 *
2363                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2364      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2365                      IE = ITAU
2366                      ITAUQ = IE + M
2367                      ITAUP = ITAUQ + M
2368                      IWORK = ITAUP + M
2369 *
2370 *                    Bidiagonalize L in WORK(IR)
2371 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2372 *
2373                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2374      $                            WORK( IE ), WORK( ITAUQ ),
2375      $                            WORK( ITAUP ), WORK( IWORK ),
2376      $                            LWORK-IWORK+1, IERR )
2377 *
2378 *                    Generate right vectors bidiagonalizing L in
2379 *                    WORK(IR)
2380 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
2381 *
2382                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2383      $                            WORK( ITAUP ), WORK( IWORK ),
2384      $                            LWORK-IWORK+1, IERR )
2385                      IWORK = IE + M
2386 *
2387 *                    Perform bidiagonal QR iteration, computing right
2388 *                    singular vectors of L in WORK(IR)
2389 *                    (Workspace: need M*M+BDSPAC)
2390 *
2391                      CALL DBDSQR( 'U', M, M, 00, S, WORK( IE ),
2392      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2393      $                            WORK( IWORK ), INFO )
2394 *
2395 *                    Multiply right singular vectors of L in WORK(IR) by
2396 *                    Q in A, storing result in VT
2397 *                    (Workspace: need M*M)
2398 *
2399                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IR ),
2400      $                           LDWRKR, A, LDA, ZERO, VT, LDVT )
2401 *
2402                   ELSE
2403 *
2404 *                    Insufficient workspace for a fast algorithm
2405 *
2406                      ITAU = 1
2407                      IWORK = ITAU + M
2408 *
2409 *                    Compute A=L*Q
2410 *                    (Workspace: need 2*M, prefer M+M*NB)
2411 *
2412                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2413      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2414 *
2415 *                    Copy result to VT
2416 *
2417                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2418 *
2419 *                    Generate Q in VT
2420 *                    (Workspace: need 2*M, prefer M+M*NB)
2421 *
2422                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2423      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2424                      IE = ITAU
2425                      ITAUQ = IE + M
2426                      ITAUP = ITAUQ + M
2427                      IWORK = ITAUP + M
2428 *
2429 *                    Zero out above L in A
2430 *
2431                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 12 ),
2432      $                            LDA )
2433 *
2434 *                    Bidiagonalize L in A
2435 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2436 *
2437                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2438      $                            WORK( ITAUQ ), WORK( ITAUP ),
2439      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2440 *
2441 *                    Multiply right vectors bidiagonalizing L by Q in VT
2442 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2443 *
2444                      CALL DORMBR( 'P''L''T', M, N, M, A, LDA,
2445      $                            WORK( ITAUP ), VT, LDVT,
2446      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2447                      IWORK = IE + M
2448 *
2449 *                    Perform bidiagonal QR iteration, computing right
2450 *                    singular vectors of A in VT
2451 *                    (Workspace: need BDSPAC)
2452 *
2453                      CALL DBDSQR( 'U', M, N, 00, S, WORK( IE ), VT,
2454      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2455      $                            INFO )
2456 *
2457                   END IF
2458 *
2459                ELSE IF( WNTUO ) THEN
2460 *
2461 *                 Path 5t(N much larger than M, JOBU='O', JOBVT='S')
2462 *                 M right singular vectors to be computed in VT and
2463 *                 M left singular vectors to be overwritten on A
2464 *
2465                   IF( LWORK.GE.2*M*M+MAX4*M, BDSPAC ) ) THEN
2466 *
2467 *                    Sufficient workspace for a fast algorithm
2468 *
2469                      IU = 1
2470                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2471 *
2472 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2473 *
2474                         LDWRKU = LDA
2475                         IR = IU + LDWRKU*M
2476                         LDWRKR = LDA
2477                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2478 *
2479 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2480 *
2481                         LDWRKU = LDA
2482                         IR = IU + LDWRKU*M
2483                         LDWRKR = M
2484                      ELSE
2485 *
2486 *                       WORK(IU) is M by M and WORK(IR) is M by M
2487 *
2488                         LDWRKU = M
2489                         IR = IU + LDWRKU*M
2490                         LDWRKR = M
2491                      END IF
2492                      ITAU = IR + LDWRKR*M
2493                      IWORK = ITAU + M
2494 *
2495 *                    Compute A=L*Q
2496 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2497 *
2498                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2499      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2500 *
2501 *                    Copy L to WORK(IU), zeroing out below it
2502 *
2503                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2504      $                            LDWRKU )
2505                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2506      $                            WORK( IU+LDWRKU ), LDWRKU )
2507 *
2508 *                    Generate Q in A
2509 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2510 *
2511                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2512      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2513                      IE = ITAU
2514                      ITAUQ = IE + M
2515                      ITAUP = ITAUQ + M
2516                      IWORK = ITAUP + M
2517 *
2518 *                    Bidiagonalize L in WORK(IU), copying result to
2519 *                    WORK(IR)
2520 *                    (Workspace: need 2*M*M+4*M,
2521 *                                prefer 2*M*M+3*M+2*M*NB)
2522 *
2523                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2524      $                            WORK( IE ), WORK( ITAUQ ),
2525      $                            WORK( ITAUP ), WORK( IWORK ),
2526      $                            LWORK-IWORK+1, IERR )
2527                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2528      $                            WORK( IR ), LDWRKR )
2529 *
2530 *                    Generate right bidiagonalizing vectors in WORK(IU)
2531 *                    (Workspace: need 2*M*M+4*M-1,
2532 *                                prefer 2*M*M+3*M+(M-1)*NB)
2533 *
2534                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2535      $                            WORK( ITAUP ), WORK( IWORK ),
2536      $                            LWORK-IWORK+1, IERR )
2537 *
2538 *                    Generate left bidiagonalizing vectors in WORK(IR)
2539 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
2540 *
2541                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
2542      $                            WORK( ITAUQ ), WORK( IWORK ),
2543      $                            LWORK-IWORK+1, IERR )
2544                      IWORK = IE + M
2545 *
2546 *                    Perform bidiagonal QR iteration, computing left
2547 *                    singular vectors of L in WORK(IR) and computing
2548 *                    right singular vectors of L in WORK(IU)
2549 *                    (Workspace: need 2*M*M+BDSPAC)
2550 *
2551                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2552      $                            WORK( IU ), LDWRKU, WORK( IR ),
2553      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
2554 *
2555 *                    Multiply right singular vectors of L in WORK(IU) by
2556 *                    Q in A, storing result in VT
2557 *                    (Workspace: need M*M)
2558 *
2559                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IU ),
2560      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2561 *
2562 *                    Copy left singular vectors of L to A
2563 *                    (Workspace: need M*M)
2564 *
2565                      CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
2566      $                            LDA )
2567 *
2568                   ELSE
2569 *
2570 *                    Insufficient workspace for a fast algorithm
2571 *
2572                      ITAU = 1
2573                      IWORK = ITAU + M
2574 *
2575 *                    Compute A=L*Q, copying result to VT
2576 *                    (Workspace: need 2*M, prefer M+M*NB)
2577 *
2578                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2579      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2580                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2581 *
2582 *                    Generate Q in VT
2583 *                    (Workspace: need 2*M, prefer M+M*NB)
2584 *
2585                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2586      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2587                      IE = ITAU
2588                      ITAUQ = IE + M
2589                      ITAUP = ITAUQ + M
2590                      IWORK = ITAUP + M
2591 *
2592 *                    Zero out above L in A
2593 *
2594                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 12 ),
2595      $                            LDA )
2596 *
2597 *                    Bidiagonalize L in A
2598 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2599 *
2600                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2601      $                            WORK( ITAUQ ), WORK( ITAUP ),
2602      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2603 *
2604 *                    Multiply right vectors bidiagonalizing L by Q in VT
2605 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2606 *
2607                      CALL DORMBR( 'P''L''T', M, N, M, A, LDA,
2608      $                            WORK( ITAUP ), VT, LDVT,
2609      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2610 *
2611 *                    Generate left bidiagonalizing vectors of L in A
2612 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2613 *
2614                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
2615      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2616                      IWORK = IE + M
2617 *
2618 *                    Perform bidiagonal QR iteration, compute left
2619 *                    singular vectors of A in A and compute right
2620 *                    singular vectors of A in VT
2621 *                    (Workspace: need BDSPAC)
2622 *
2623                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2624      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
2625      $                            INFO )
2626 *
2627                   END IF
2628 *
2629                ELSE IF( WNTUAS ) THEN
2630 *
2631 *                 Path 6t(N much larger than M, JOBU='S' or 'A',
2632 *                         JOBVT='S')
2633 *                 M right singular vectors to be computed in VT and
2634 *                 M left singular vectors to be computed in U
2635 *
2636                   IF( LWORK.GE.M*M+MAX4*M, BDSPAC ) ) THEN
2637 *
2638 *                    Sufficient workspace for a fast algorithm
2639 *
2640                      IU = 1
2641                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2642 *
2643 *                       WORK(IU) is LDA by N
2644 *
2645                         LDWRKU = LDA
2646                      ELSE
2647 *
2648 *                       WORK(IU) is LDA by M
2649 *
2650                         LDWRKU = M
2651                      END IF
2652                      ITAU = IU + LDWRKU*M
2653                      IWORK = ITAU + M
2654 *
2655 *                    Compute A=L*Q
2656 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2657 *
2658                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2659      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2660 *
2661 *                    Copy L to WORK(IU), zeroing out above it
2662 *
2663                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2664      $                            LDWRKU )
2665                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2666      $                            WORK( IU+LDWRKU ), LDWRKU )
2667 *
2668 *                    Generate Q in A
2669 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2670 *
2671                      CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
2672      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2673                      IE = ITAU
2674                      ITAUQ = IE + M
2675                      ITAUP = ITAUQ + M
2676                      IWORK = ITAUP + M
2677 *
2678 *                    Bidiagonalize L in WORK(IU), copying result to U
2679 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2680 *
2681                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2682      $                            WORK( IE ), WORK( ITAUQ ),
2683      $                            WORK( ITAUP ), WORK( IWORK ),
2684      $                            LWORK-IWORK+1, IERR )
2685                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
2686      $                            LDU )
2687 *
2688 *                    Generate right bidiagonalizing vectors in WORK(IU)
2689 *                    (Workspace: need M*M+4*M-1,
2690 *                                prefer M*M+3*M+(M-1)*NB)
2691 *
2692                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
2693      $                            WORK( ITAUP ), WORK( IWORK ),
2694      $                            LWORK-IWORK+1, IERR )
2695 *
2696 *                    Generate left bidiagonalizing vectors in U
2697 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
2698 *
2699                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2700      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2701                      IWORK = IE + M
2702 *
2703 *                    Perform bidiagonal QR iteration, computing left
2704 *                    singular vectors of L in U and computing right
2705 *                    singular vectors of L in WORK(IU)
2706 *                    (Workspace: need M*M+BDSPAC)
2707 *
2708                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
2709      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
2710      $                            WORK( IWORK ), INFO )
2711 *
2712 *                    Multiply right singular vectors of L in WORK(IU) by
2713 *                    Q in A, storing result in VT
2714 *                    (Workspace: need M*M)
2715 *
2716                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IU ),
2717      $                           LDWRKU, A, LDA, ZERO, VT, LDVT )
2718 *
2719                   ELSE
2720 *
2721 *                    Insufficient workspace for a fast algorithm
2722 *
2723                      ITAU = 1
2724                      IWORK = ITAU + M
2725 *
2726 *                    Compute A=L*Q, copying result to VT
2727 *                    (Workspace: need 2*M, prefer M+M*NB)
2728 *
2729                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2730      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2731                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2732 *
2733 *                    Generate Q in VT
2734 *                    (Workspace: need 2*M, prefer M+M*NB)
2735 *
2736                      CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
2737      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2738 *
2739 *                    Copy L to U, zeroing out above it
2740 *
2741                      CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
2742                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 12 ),
2743      $                            LDU )
2744                      IE = ITAU
2745                      ITAUQ = IE + M
2746                      ITAUP = ITAUQ + M
2747                      IWORK = ITAUP + M
2748 *
2749 *                    Bidiagonalize L in U
2750 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2751 *
2752                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
2753      $                            WORK( ITAUQ ), WORK( ITAUP ),
2754      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2755 *
2756 *                    Multiply right bidiagonalizing vectors in U by Q
2757 *                    in VT
2758 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2759 *
2760                      CALL DORMBR( 'P''L''T', M, N, M, U, LDU,
2761      $                            WORK( ITAUP ), VT, LDVT,
2762      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2763 *
2764 *                    Generate left bidiagonalizing vectors in U
2765 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
2766 *
2767                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
2768      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2769                      IWORK = IE + M
2770 *
2771 *                    Perform bidiagonal QR iteration, computing left
2772 *                    singular vectors of A in U and computing right
2773 *                    singular vectors of A in VT
2774 *                    (Workspace: need BDSPAC)
2775 *
2776                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
2777      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
2778      $                            INFO )
2779 *
2780                   END IF
2781 *
2782                END IF
2783 *
2784             ELSE IF( WNTVA ) THEN
2785 *
2786                IF( WNTUN ) THEN
2787 *
2788 *                 Path 7t(N much larger than M, JOBU='N', JOBVT='A')
2789 *                 N right singular vectors to be computed in VT and
2790 *                 no left singular vectors to be computed
2791 *
2792                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2793 *
2794 *                    Sufficient workspace for a fast algorithm
2795 *
2796                      IR = 1
2797                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
2798 *
2799 *                       WORK(IR) is LDA by M
2800 *
2801                         LDWRKR = LDA
2802                      ELSE
2803 *
2804 *                       WORK(IR) is M by M
2805 *
2806                         LDWRKR = M
2807                      END IF
2808                      ITAU = IR + LDWRKR*M
2809                      IWORK = ITAU + M
2810 *
2811 *                    Compute A=L*Q, copying result to VT
2812 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
2813 *
2814                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2815      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2816                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2817 *
2818 *                    Copy L to WORK(IR), zeroing out above it
2819 *
2820                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
2821      $                            LDWRKR )
2822                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2823      $                            WORK( IR+LDWRKR ), LDWRKR )
2824 *
2825 *                    Generate Q in VT
2826 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
2827 *
2828                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2829      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2830                      IE = ITAU
2831                      ITAUQ = IE + M
2832                      ITAUP = ITAUQ + M
2833                      IWORK = ITAUP + M
2834 *
2835 *                    Bidiagonalize L in WORK(IR)
2836 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
2837 *
2838                      CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
2839      $                            WORK( IE ), WORK( ITAUQ ),
2840      $                            WORK( ITAUP ), WORK( IWORK ),
2841      $                            LWORK-IWORK+1, IERR )
2842 *
2843 *                    Generate right bidiagonalizing vectors in WORK(IR)
2844 *                    (Workspace: need M*M+4*M-1,
2845 *                                prefer M*M+3*M+(M-1)*NB)
2846 *
2847                      CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
2848      $                            WORK( ITAUP ), WORK( IWORK ),
2849      $                            LWORK-IWORK+1, IERR )
2850                      IWORK = IE + M
2851 *
2852 *                    Perform bidiagonal QR iteration, computing right
2853 *                    singular vectors of L in WORK(IR)
2854 *                    (Workspace: need M*M+BDSPAC)
2855 *
2856                      CALL DBDSQR( 'U', M, M, 00, S, WORK( IE ),
2857      $                            WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
2858      $                            WORK( IWORK ), INFO )
2859 *
2860 *                    Multiply right singular vectors of L in WORK(IR) by
2861 *                    Q in VT, storing result in A
2862 *                    (Workspace: need M*M)
2863 *
2864                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IR ),
2865      $                           LDWRKR, VT, LDVT, ZERO, A, LDA )
2866 *
2867 *                    Copy right singular vectors of A from A to VT
2868 *
2869                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
2870 *
2871                   ELSE
2872 *
2873 *                    Insufficient workspace for a fast algorithm
2874 *
2875                      ITAU = 1
2876                      IWORK = ITAU + M
2877 *
2878 *                    Compute A=L*Q, copying result to VT
2879 *                    (Workspace: need 2*M, prefer M+M*NB)
2880 *
2881                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2882      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2883                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2884 *
2885 *                    Generate Q in VT
2886 *                    (Workspace: need M+N, prefer M+N*NB)
2887 *
2888                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2889      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2890                      IE = ITAU
2891                      ITAUQ = IE + M
2892                      ITAUP = ITAUQ + M
2893                      IWORK = ITAUP + M
2894 *
2895 *                    Zero out above L in A
2896 *
2897                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 12 ),
2898      $                            LDA )
2899 *
2900 *                    Bidiagonalize L in A
2901 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
2902 *
2903                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
2904      $                            WORK( ITAUQ ), WORK( ITAUP ),
2905      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2906 *
2907 *                    Multiply right bidiagonalizing vectors in A by Q
2908 *                    in VT
2909 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
2910 *
2911                      CALL DORMBR( 'P''L''T', M, N, M, A, LDA,
2912      $                            WORK( ITAUP ), VT, LDVT,
2913      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2914                      IWORK = IE + M
2915 *
2916 *                    Perform bidiagonal QR iteration, computing right
2917 *                    singular vectors of A in VT
2918 *                    (Workspace: need BDSPAC)
2919 *
2920                      CALL DBDSQR( 'U', M, N, 00, S, WORK( IE ), VT,
2921      $                            LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
2922      $                            INFO )
2923 *
2924                   END IF
2925 *
2926                ELSE IF( WNTUO ) THEN
2927 *
2928 *                 Path 8t(N much larger than M, JOBU='O', JOBVT='A')
2929 *                 N right singular vectors to be computed in VT and
2930 *                 M left singular vectors to be overwritten on A
2931 *
2932                   IF( LWORK.GE.2*M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
2933 *
2934 *                    Sufficient workspace for a fast algorithm
2935 *
2936                      IU = 1
2937                      IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
2938 *
2939 *                       WORK(IU) is LDA by M and WORK(IR) is LDA by M
2940 *
2941                         LDWRKU = LDA
2942                         IR = IU + LDWRKU*M
2943                         LDWRKR = LDA
2944                      ELSE IF( LWORK.GE.WRKBL+( LDA+M )*M ) THEN
2945 *
2946 *                       WORK(IU) is LDA by M and WORK(IR) is M by M
2947 *
2948                         LDWRKU = LDA
2949                         IR = IU + LDWRKU*M
2950                         LDWRKR = M
2951                      ELSE
2952 *
2953 *                       WORK(IU) is M by M and WORK(IR) is M by M
2954 *
2955                         LDWRKU = M
2956                         IR = IU + LDWRKU*M
2957                         LDWRKR = M
2958                      END IF
2959                      ITAU = IR + LDWRKR*M
2960                      IWORK = ITAU + M
2961 *
2962 *                    Compute A=L*Q, copying result to VT
2963 *                    (Workspace: need 2*M*M+2*M, prefer 2*M*M+M+M*NB)
2964 *
2965                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
2966      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2967                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
2968 *
2969 *                    Generate Q in VT
2970 *                    (Workspace: need 2*M*M+M+N, prefer 2*M*M+M+N*NB)
2971 *
2972                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
2973      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
2974 *
2975 *                    Copy L to WORK(IU), zeroing out above it
2976 *
2977                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
2978      $                            LDWRKU )
2979                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
2980      $                            WORK( IU+LDWRKU ), LDWRKU )
2981                      IE = ITAU
2982                      ITAUQ = IE + M
2983                      ITAUP = ITAUQ + M
2984                      IWORK = ITAUP + M
2985 *
2986 *                    Bidiagonalize L in WORK(IU), copying result to
2987 *                    WORK(IR)
2988 *                    (Workspace: need 2*M*M+4*M,
2989 *                                prefer 2*M*M+3*M+2*M*NB)
2990 *
2991                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
2992      $                            WORK( IE ), WORK( ITAUQ ),
2993      $                            WORK( ITAUP ), WORK( IWORK ),
2994      $                            LWORK-IWORK+1, IERR )
2995                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
2996      $                            WORK( IR ), LDWRKR )
2997 *
2998 *                    Generate right bidiagonalizing vectors in WORK(IU)
2999 *                    (Workspace: need 2*M*M+4*M-1,
3000 *                                prefer 2*M*M+3*M+(M-1)*NB)
3001 *
3002                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3003      $                            WORK( ITAUP ), WORK( IWORK ),
3004      $                            LWORK-IWORK+1, IERR )
3005 *
3006 *                    Generate left bidiagonalizing vectors in WORK(IR)
3007 *                    (Workspace: need 2*M*M+4*M, prefer 2*M*M+3*M+M*NB)
3008 *
3009                      CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
3010      $                            WORK( ITAUQ ), WORK( IWORK ),
3011      $                            LWORK-IWORK+1, IERR )
3012                      IWORK = IE + M
3013 *
3014 *                    Perform bidiagonal QR iteration, computing left
3015 *                    singular vectors of L in WORK(IR) and computing
3016 *                    right singular vectors of L in WORK(IU)
3017 *                    (Workspace: need 2*M*M+BDSPAC)
3018 *
3019                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3020      $                            WORK( IU ), LDWRKU, WORK( IR ),
3021      $                            LDWRKR, DUM, 1, WORK( IWORK ), INFO )
3022 *
3023 *                    Multiply right singular vectors of L in WORK(IU) by
3024 *                    Q in VT, storing result in A
3025 *                    (Workspace: need M*M)
3026 *
3027                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IU ),
3028      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3029 *
3030 *                    Copy right singular vectors of A from A to VT
3031 *
3032                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3033 *
3034 *                    Copy left singular vectors of A from WORK(IR) to A
3035 *
3036                      CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
3037      $                            LDA )
3038 *
3039                   ELSE
3040 *
3041 *                    Insufficient workspace for a fast algorithm
3042 *
3043                      ITAU = 1
3044                      IWORK = ITAU + M
3045 *
3046 *                    Compute A=L*Q, copying result to VT
3047 *                    (Workspace: need 2*M, prefer M+M*NB)
3048 *
3049                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3050      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3051                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3052 *
3053 *                    Generate Q in VT
3054 *                    (Workspace: need M+N, prefer M+N*NB)
3055 *
3056                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3057      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3058                      IE = ITAU
3059                      ITAUQ = IE + M
3060                      ITAUP = ITAUQ + M
3061                      IWORK = ITAUP + M
3062 *
3063 *                    Zero out above L in A
3064 *
3065                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 12 ),
3066      $                            LDA )
3067 *
3068 *                    Bidiagonalize L in A
3069 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3070 *
3071                      CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
3072      $                            WORK( ITAUQ ), WORK( ITAUP ),
3073      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3074 *
3075 *                    Multiply right bidiagonalizing vectors in A by Q
3076 *                    in VT
3077 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3078 *
3079                      CALL DORMBR( 'P''L''T', M, N, M, A, LDA,
3080      $                            WORK( ITAUP ), VT, LDVT,
3081      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3082 *
3083 *                    Generate left bidiagonalizing vectors in A
3084 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3085 *
3086                      CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
3087      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3088                      IWORK = IE + M
3089 *
3090 *                    Perform bidiagonal QR iteration, computing left
3091 *                    singular vectors of A in A and computing right
3092 *                    singular vectors of A in VT
3093 *                    (Workspace: need BDSPAC)
3094 *
3095                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3096      $                            LDVT, A, LDA, DUM, 1, WORK( IWORK ),
3097      $                            INFO )
3098 *
3099                   END IF
3100 *
3101                ELSE IF( WNTUAS ) THEN
3102 *
3103 *                 Path 9t(N much larger than M, JOBU='S' or 'A',
3104 *                         JOBVT='A')
3105 *                 N right singular vectors to be computed in VT and
3106 *                 M left singular vectors to be computed in U
3107 *
3108                   IF( LWORK.GE.M*M+MAX( N+M, 4*M, BDSPAC ) ) THEN
3109 *
3110 *                    Sufficient workspace for a fast algorithm
3111 *
3112                      IU = 1
3113                      IF( LWORK.GE.WRKBL+LDA*M ) THEN
3114 *
3115 *                       WORK(IU) is LDA by M
3116 *
3117                         LDWRKU = LDA
3118                      ELSE
3119 *
3120 *                       WORK(IU) is M by M
3121 *
3122                         LDWRKU = M
3123                      END IF
3124                      ITAU = IU + LDWRKU*M
3125                      IWORK = ITAU + M
3126 *
3127 *                    Compute A=L*Q, copying result to VT
3128 *                    (Workspace: need M*M+2*M, prefer M*M+M+M*NB)
3129 *
3130                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3131      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3132                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3133 *
3134 *                    Generate Q in VT
3135 *                    (Workspace: need M*M+M+N, prefer M*M+M+N*NB)
3136 *
3137                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3138      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3139 *
3140 *                    Copy L to WORK(IU), zeroing out above it
3141 *
3142                      CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
3143      $                            LDWRKU )
3144                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
3145      $                            WORK( IU+LDWRKU ), LDWRKU )
3146                      IE = ITAU
3147                      ITAUQ = IE + M
3148                      ITAUP = ITAUQ + M
3149                      IWORK = ITAUP + M
3150 *
3151 *                    Bidiagonalize L in WORK(IU), copying result to U
3152 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+2*M*NB)
3153 *
3154                      CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
3155      $                            WORK( IE ), WORK( ITAUQ ),
3156      $                            WORK( ITAUP ), WORK( IWORK ),
3157      $                            LWORK-IWORK+1, IERR )
3158                      CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
3159      $                            LDU )
3160 *
3161 *                    Generate right bidiagonalizing vectors in WORK(IU)
3162 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+(M-1)*NB)
3163 *
3164                      CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
3165      $                            WORK( ITAUP ), WORK( IWORK ),
3166      $                            LWORK-IWORK+1, IERR )
3167 *
3168 *                    Generate left bidiagonalizing vectors in U
3169 *                    (Workspace: need M*M+4*M, prefer M*M+3*M+M*NB)
3170 *
3171                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3172      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3173                      IWORK = IE + M
3174 *
3175 *                    Perform bidiagonal QR iteration, computing left
3176 *                    singular vectors of L in U and computing right
3177 *                    singular vectors of L in WORK(IU)
3178 *                    (Workspace: need M*M+BDSPAC)
3179 *
3180                      CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
3181      $                            WORK( IU ), LDWRKU, U, LDU, DUM, 1,
3182      $                            WORK( IWORK ), INFO )
3183 *
3184 *                    Multiply right singular vectors of L in WORK(IU) by
3185 *                    Q in VT, storing result in A
3186 *                    (Workspace: need M*M)
3187 *
3188                      CALL DGEMM( 'N''N', M, N, M, ONE, WORK( IU ),
3189      $                           LDWRKU, VT, LDVT, ZERO, A, LDA )
3190 *
3191 *                    Copy right singular vectors of A from A to VT
3192 *
3193                      CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
3194 *
3195                   ELSE
3196 *
3197 *                    Insufficient workspace for a fast algorithm
3198 *
3199                      ITAU = 1
3200                      IWORK = ITAU + M
3201 *
3202 *                    Compute A=L*Q, copying result to VT
3203 *                    (Workspace: need 2*M, prefer M+M*NB)
3204 *
3205                      CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
3206      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3207                      CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3208 *
3209 *                    Generate Q in VT
3210 *                    (Workspace: need M+N, prefer M+N*NB)
3211 *
3212                      CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
3213      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3214 *
3215 *                    Copy L to U, zeroing out above it
3216 *
3217                      CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3218                      CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 12 ),
3219      $                            LDU )
3220                      IE = ITAU
3221                      ITAUQ = IE + M
3222                      ITAUP = ITAUQ + M
3223                      IWORK = ITAUP + M
3224 *
3225 *                    Bidiagonalize L in U
3226 *                    (Workspace: need 4*M, prefer 3*M+2*M*NB)
3227 *
3228                      CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
3229      $                            WORK( ITAUQ ), WORK( ITAUP ),
3230      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3231 *
3232 *                    Multiply right bidiagonalizing vectors in U by Q
3233 *                    in VT
3234 *                    (Workspace: need 3*M+N, prefer 3*M+N*NB)
3235 *
3236                      CALL DORMBR( 'P''L''T', M, N, M, U, LDU,
3237      $                            WORK( ITAUP ), VT, LDVT,
3238      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3239 *
3240 *                    Generate left bidiagonalizing vectors in U
3241 *                    (Workspace: need 4*M, prefer 3*M+M*NB)
3242 *
3243                      CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
3244      $                            WORK( IWORK ), LWORK-IWORK+1, IERR )
3245                      IWORK = IE + M
3246 *
3247 *                    Perform bidiagonal QR iteration, computing left
3248 *                    singular vectors of A in U and computing right
3249 *                    singular vectors of A in VT
3250 *                    (Workspace: need BDSPAC)
3251 *
3252                      CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
3253      $                            LDVT, U, LDU, DUM, 1, WORK( IWORK ),
3254      $                            INFO )
3255 *
3256                   END IF
3257 *
3258                END IF
3259 *
3260             END IF
3261 *
3262          ELSE
3263 *
3264 *           N .LT. MNTHR
3265 *
3266 *           Path 10t(N greater than M, but not much larger)
3267 *           Reduce to bidiagonal form without LQ decomposition
3268 *
3269             IE = 1
3270             ITAUQ = IE + M
3271             ITAUP = ITAUQ + M
3272             IWORK = ITAUP + M
3273 *
3274 *           Bidiagonalize A
3275 *           (Workspace: need 3*M+N, prefer 3*M+(M+N)*NB)
3276 *
3277             CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
3278      $                   WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
3279      $                   IERR )
3280             IF( WNTUAS ) THEN
3281 *
3282 *              If left singular vectors desired in U, copy result to U
3283 *              and generate left bidiagonalizing vectors in U
3284 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3285 *
3286                CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
3287                CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
3288      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3289             END IF
3290             IF( WNTVAS ) THEN
3291 *
3292 *              If right singular vectors desired in VT, copy result to
3293 *              VT and generate right bidiagonalizing vectors in VT
3294 *              (Workspace: need 3*M+NRVT, prefer 3*M+NRVT*NB)
3295 *
3296                CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
3297                IF( WNTVA )
3298      $            NRVT = N
3299                IF( WNTVS )
3300      $            NRVT = M
3301                CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
3302      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3303             END IF
3304             IF( WNTUO ) THEN
3305 *
3306 *              If left singular vectors desired in A, generate left
3307 *              bidiagonalizing vectors in A
3308 *              (Workspace: need 4*M-1, prefer 3*M+(M-1)*NB)
3309 *
3310                CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
3311      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3312             END IF
3313             IF( WNTVO ) THEN
3314 *
3315 *              If right singular vectors desired in A, generate right
3316 *              bidiagonalizing vectors in A
3317 *              (Workspace: need 4*M, prefer 3*M+M*NB)
3318 *
3319                CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
3320      $                      WORK( IWORK ), LWORK-IWORK+1, IERR )
3321             END IF
3322             IWORK = IE + M
3323             IF( WNTUAS .OR. WNTUO )
3324      $         NRU = M
3325             IF( WNTUN )
3326      $         NRU = 0
3327             IF( WNTVAS .OR. WNTVO )
3328      $         NCVT = N
3329             IF( WNTVN )
3330      $         NCVT = 0
3331             IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
3332 *
3333 *              Perform bidiagonal QR iteration, if desired, computing
3334 *              left singular vectors in U and computing right singular
3335 *              vectors in VT
3336 *              (Workspace: need BDSPAC)
3337 *
3338                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3339      $                      LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
3340             ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
3341 *
3342 *              Perform bidiagonal QR iteration, if desired, computing
3343 *              left singular vectors in U and computing right singular
3344 *              vectors in A
3345 *              (Workspace: need BDSPAC)
3346 *
3347                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
3348      $                      U, LDU, DUM, 1, WORK( IWORK ), INFO )
3349             ELSE
3350 *
3351 *              Perform bidiagonal QR iteration, if desired, computing
3352 *              left singular vectors in A and computing right singular
3353 *              vectors in VT
3354 *              (Workspace: need BDSPAC)
3355 *
3356                CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
3357      $                      LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
3358             END IF
3359 *
3360          END IF
3361 *
3362       END IF
3363 *
3364 *     If DBDSQR failed to converge, copy unconverged superdiagonals
3365 *     to WORK( 2:MINMN )
3366 *
3367       IF( INFO.NE.0 ) THEN
3368          IF( IE.GT.2 ) THEN
3369             DO 50 I = 1, MINMN - 1
3370                WORK( I+1 ) = WORK( I+IE-1 )
3371    50       CONTINUE
3372          END IF
3373          IF( IE.LT.2 ) THEN
3374             DO 60 I = MINMN - 11-1
3375                WORK( I+1 ) = WORK( I+IE-1 )
3376    60       CONTINUE
3377          END IF
3378       END IF
3379 *
3380 *     Undo scaling if necessary
3381 *
3382       IF( ISCL.EQ.1 ) THEN
3383          IF( ANRM.GT.BIGNUM )
3384      $      CALL DLASCL( 'G'00, BIGNUM, ANRM, MINMN, 1, S, MINMN,
3385      $                   IERR )
3386          IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
3387      $      CALL DLASCL( 'G'00, BIGNUM, ANRM, MINMN-11, WORK( 2 ),
3388      $                   MINMN, IERR )
3389          IF( ANRM.LT.SMLNUM )
3390      $      CALL DLASCL( 'G'00, SMLNUM, ANRM, MINMN, 1, S, MINMN,
3391      $                   IERR )
3392          IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
3393      $      CALL DLASCL( 'G'00, SMLNUM, ANRM, MINMN-11, WORK( 2 ),
3394      $                   MINMN, IERR )
3395       END IF
3396 *
3397 *     Return optimal workspace in WORK(1)
3398 *
3399       WORK( 1 ) = MAXWRK
3400 *
3401       RETURN
3402 *
3403 *     End of DGESVD
3404 *
3405       END