1       SUBROUTINE DLALSD( UPLO, SMLSIZ, N, NRHS, D, E, B, LDB, RCOND,
  2      $                   RANK, WORK, IWORK, INFO )
  3 *
  4 *  -- LAPACK routine (version 3.2.2) --
  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  7 *     June 2010
  8 *
  9 *     .. Scalar Arguments ..
 10       CHARACTER          UPLO
 11       INTEGER            INFO, LDB, N, NRHS, RANK, SMLSIZ
 12       DOUBLE PRECISION   RCOND
 13 *     ..
 14 *     .. Array Arguments ..
 15       INTEGER            IWORK( * )
 16       DOUBLE PRECISION   B( LDB, * ), D( * ), E( * ), WORK( * )
 17 *     ..
 18 *
 19 *  Purpose
 20 *  =======
 21 *
 22 *  DLALSD uses the singular value decomposition of A to solve the least
 23 *  squares problem of finding X to minimize the Euclidean norm of each
 24 *  column of A*X-B, where A is N-by-N upper bidiagonal, and X and B
 25 *  are N-by-NRHS. The solution X overwrites B.
 26 *
 27 *  The singular values of A smaller than RCOND times the largest
 28 *  singular value are treated as zero in solving the least squares
 29 *  problem; in this case a minimum norm solution is returned.
 30 *  The actual singular values are returned in D in ascending order.
 31 *
 32 *  This code makes very mild assumptions about floating point
 33 *  arithmetic. It will work on machines with a guard digit in
 34 *  add/subtract, or on those binary machines without guard digits
 35 *  which subtract like the Cray XMP, Cray YMP, Cray C 90, or Cray 2.
 36 *  It could conceivably fail on hexadecimal or decimal machines
 37 *  without guard digits, but we know of none.
 38 *
 39 *  Arguments
 40 *  =========
 41 *
 42 *  UPLO   (input) CHARACTER*1
 43 *         = 'U': D and E define an upper bidiagonal matrix.
 44 *         = 'L': D and E define a  lower bidiagonal matrix.
 45 *
 46 *  SMLSIZ (input) INTEGER
 47 *         The maximum size of the subproblems at the bottom of the
 48 *         computation tree.
 49 *
 50 *  N      (input) INTEGER
 51 *         The dimension of the  bidiagonal matrix.  N >= 0.
 52 *
 53 *  NRHS   (input) INTEGER
 54 *         The number of columns of B. NRHS must be at least 1.
 55 *
 56 *  D      (input/output) DOUBLE PRECISION array, dimension (N)
 57 *         On entry D contains the main diagonal of the bidiagonal
 58 *         matrix. On exit, if INFO = 0, D contains its singular values.
 59 *
 60 *  E      (input/output) DOUBLE PRECISION array, dimension (N-1)
 61 *         Contains the super-diagonal entries of the bidiagonal matrix.
 62 *         On exit, E has been destroyed.
 63 *
 64 *  B      (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
 65 *         On input, B contains the right hand sides of the least
 66 *         squares problem. On output, B contains the solution X.
 67 *
 68 *  LDB    (input) INTEGER
 69 *         The leading dimension of B in the calling subprogram.
 70 *         LDB must be at least max(1,N).
 71 *
 72 *  RCOND  (input) DOUBLE PRECISION
 73 *         The singular values of A less than or equal to RCOND times
 74 *         the largest singular value are treated as zero in solving
 75 *         the least squares problem. If RCOND is negative,
 76 *         machine precision is used instead.
 77 *         For example, if diag(S)*X=B were the least squares problem,
 78 *         where diag(S) is a diagonal matrix of singular values, the
 79 *         solution would be X(i) = B(i) / S(i) if S(i) is greater than
 80 *         RCOND*max(S), and X(i) = 0 if S(i) is less than or equal to
 81 *         RCOND*max(S).
 82 *
 83 *  RANK   (output) INTEGER
 84 *         The number of singular values of A greater than RCOND times
 85 *         the largest singular value.
 86 *
 87 *  WORK   (workspace) DOUBLE PRECISION array, dimension at least
 88 *         (9*N + 2*N*SMLSIZ + 8*N*NLVL + N*NRHS + (SMLSIZ+1)**2),
 89 *         where NLVL = max(0, INT(log_2 (N/(SMLSIZ+1))) + 1).
 90 *
 91 *  IWORK  (workspace) INTEGER array, dimension at least
 92 *         (3*N*NLVL + 11*N)
 93 *
 94 *  INFO   (output) INTEGER
 95 *         = 0:  successful exit.
 96 *         < 0:  if INFO = -i, the i-th argument had an illegal value.
 97 *         > 0:  The algorithm failed to compute a singular value while
 98 *               working on the submatrix lying in rows and columns
 99 *               INFO/(N+1) through MOD(INFO,N+1).
100 *
101 *  Further Details
102 *  ===============
103 *
104 *  Based on contributions by
105 *     Ming Gu and Ren-Cang Li, Computer Science Division, University of
106 *       California at Berkeley, USA
107 *     Osni Marques, LBNL/NERSC, USA
108 *
109 *  =====================================================================
110 *
111 *     .. Parameters ..
112       DOUBLE PRECISION   ZERO, ONE, TWO
113       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0 )
114 *     ..
115 *     .. Local Scalars ..
116       INTEGER            BX, BXST, C, DIFL, DIFR, GIVCOL, GIVNUM,
117      $                   GIVPTR, I, ICMPQ1, ICMPQ2, IWK, J, K, NLVL,
118      $                   NM1, NSIZE, NSUB, NWORK, PERM, POLES, S, SIZEI,
119      $                   SMLSZP, SQRE, ST, ST1, U, VT, Z
120       DOUBLE PRECISION   CS, EPS, ORGNRM, R, RCND, SN, TOL
121 *     ..
122 *     .. External Functions ..
123       INTEGER            IDAMAX
124       DOUBLE PRECISION   DLAMCH, DLANST
125       EXTERNAL           IDAMAX, DLAMCH, DLANST
126 *     ..
127 *     .. External Subroutines ..
128       EXTERNAL           DCOPY, DGEMM, DLACPY, DLALSA, DLARTG, DLASCL,
129      $                   DLASDA, DLASDQ, DLASET, DLASRT, DROT, XERBLA
130 *     ..
131 *     .. Intrinsic Functions ..
132       INTRINSIC          ABSDBLEINTLOGSIGN
133 *     ..
134 *     .. Executable Statements ..
135 *
136 *     Test the input parameters.
137 *
138       INFO = 0
139 *
140       IF( N.LT.0 ) THEN
141          INFO = -3
142       ELSE IF( NRHS.LT.1 ) THEN
143          INFO = -4
144       ELSE IF( ( LDB.LT.1 ) .OR. ( LDB.LT.N ) ) THEN
145          INFO = -8
146       END IF
147       IF( INFO.NE.0 ) THEN
148          CALL XERBLA( 'DLALSD'-INFO )
149          RETURN
150       END IF
151 *
152       EPS = DLAMCH( 'Epsilon' )
153 *
154 *     Set up the tolerance.
155 *
156       IF( ( RCOND.LE.ZERO ) .OR. ( RCOND.GE.ONE ) ) THEN
157          RCND = EPS
158       ELSE
159          RCND = RCOND
160       END IF
161 *
162       RANK = 0
163 *
164 *     Quick return if possible.
165 *
166       IF( N.EQ.0 ) THEN
167          RETURN
168       ELSE IF( N.EQ.1 ) THEN
169          IF( D( 1 ).EQ.ZERO ) THEN
170             CALL DLASET( 'A'1, NRHS, ZERO, ZERO, B, LDB )
171          ELSE
172             RANK = 1
173             CALL DLASCL( 'G'00, D( 1 ), ONE, 1, NRHS, B, LDB, INFO )
174             D( 1 ) = ABS( D( 1 ) )
175          END IF
176          RETURN
177       END IF
178 *
179 *     Rotate the matrix if it is lower bidiagonal.
180 *
181       IF( UPLO.EQ.'L' ) THEN
182          DO 10 I = 1, N - 1
183             CALL DLARTG( D( I ), E( I ), CS, SN, R )
184             D( I ) = R
185             E( I ) = SN*D( I+1 )
186             D( I+1 ) = CS*D( I+1 )
187             IF( NRHS.EQ.1 ) THEN
188                CALL DROT( 1, B( I, 1 ), 1, B( I+11 ), 1, CS, SN )
189             ELSE
190                WORK( I*2-1 ) = CS
191                WORK( I*2 ) = SN
192             END IF
193    10    CONTINUE
194          IF( NRHS.GT.1 ) THEN
195             DO 30 I = 1, NRHS
196                DO 20 J = 1, N - 1
197                   CS = WORK( J*2-1 )
198                   SN = WORK( J*2 )
199                   CALL DROT( 1, B( J, I ), 1, B( J+1, I ), 1, CS, SN )
200    20          CONTINUE
201    30       CONTINUE
202          END IF
203       END IF
204 *
205 *     Scale.
206 *
207       NM1 = N - 1
208       ORGNRM = DLANST( 'M', N, D, E )
209       IF( ORGNRM.EQ.ZERO ) THEN
210          CALL DLASET( 'A', N, NRHS, ZERO, ZERO, B, LDB )
211          RETURN
212       END IF
213 *
214       CALL DLASCL( 'G'00, ORGNRM, ONE, N, 1, D, N, INFO )
215       CALL DLASCL( 'G'00, ORGNRM, ONE, NM1, 1, E, NM1, INFO )
216 *
217 *     If N is smaller than the minimum divide size SMLSIZ, then solve
218 *     the problem with another solver.
219 *
220       IF( N.LE.SMLSIZ ) THEN
221          NWORK = 1 + N*N
222          CALL DLASET( 'A', N, N, ZERO, ONE, WORK, N )
223          CALL DLASDQ( 'U'0, N, N, 0, NRHS, D, E, WORK, N, WORK, N, B,
224      $                LDB, WORK( NWORK ), INFO )
225          IF( INFO.NE.0 ) THEN
226             RETURN
227          END IF
228          TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
229          DO 40 I = 1, N
230             IF( D( I ).LE.TOL ) THEN
231                CALL DLASET( 'A'1, NRHS, ZERO, ZERO, B( I, 1 ), LDB )
232             ELSE
233                CALL DLASCL( 'G'00, D( I ), ONE, 1, NRHS, B( I, 1 ),
234      $                      LDB, INFO )
235                RANK = RANK + 1
236             END IF
237    40    CONTINUE
238          CALL DGEMM( 'T''N', N, NRHS, N, ONE, WORK, N, B, LDB, ZERO,
239      $               WORK( NWORK ), N )
240          CALL DLACPY( 'A', N, NRHS, WORK( NWORK ), N, B, LDB )
241 *
242 *        Unscale.
243 *
244          CALL DLASCL( 'G'00, ONE, ORGNRM, N, 1, D, N, INFO )
245          CALL DLASRT( 'D', N, D, INFO )
246          CALL DLASCL( 'G'00, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
247 *
248          RETURN
249       END IF
250 *
251 *     Book-keeping and setting up some constants.
252 *
253       NLVL = INTLOGDBLE( N ) / DBLE( SMLSIZ+1 ) ) / LOG( TWO ) ) + 1
254 *
255       SMLSZP = SMLSIZ + 1
256 *
257       U = 1
258       VT = 1 + SMLSIZ*N
259       DIFL = VT + SMLSZP*N
260       DIFR = DIFL + NLVL*N
261       Z = DIFR + NLVL*N*2
262       C = Z + NLVL*N
263       S = C + N
264       POLES = S + N
265       GIVNUM = POLES + 2*NLVL*N
266       BX = GIVNUM + 2*NLVL*N
267       NWORK = BX + N*NRHS
268 *
269       SIZEI = 1 + N
270       K = SIZEI + N
271       GIVPTR = K + N
272       PERM = GIVPTR + N
273       GIVCOL = PERM + NLVL*N
274       IWK = GIVCOL + NLVL*N*2
275 *
276       ST = 1
277       SQRE = 0
278       ICMPQ1 = 1
279       ICMPQ2 = 0
280       NSUB = 0
281 *
282       DO 50 I = 1, N
283          IFABS( D( I ) ).LT.EPS ) THEN
284             D( I ) = SIGN( EPS, D( I ) )
285          END IF
286    50 CONTINUE
287 *
288       DO 60 I = 1, NM1
289          IF( ( ABS( E( I ) ).LT.EPS ) .OR. ( I.EQ.NM1 ) ) THEN
290             NSUB = NSUB + 1
291             IWORK( NSUB ) = ST
292 *
293 *           Subproblem found. First determine its size and then
294 *           apply divide and conquer on it.
295 *
296             IF( I.LT.NM1 ) THEN
297 *
298 *              A subproblem with E(I) small for I < NM1.
299 *
300                NSIZE = I - ST + 1
301                IWORK( SIZEI+NSUB-1 ) = NSIZE
302             ELSE IFABS( E( I ) ).GE.EPS ) THEN
303 *
304 *              A subproblem with E(NM1) not too small but I = NM1.
305 *
306                NSIZE = N - ST + 1
307                IWORK( SIZEI+NSUB-1 ) = NSIZE
308             ELSE
309 *
310 *              A subproblem with E(NM1) small. This implies an
311 *              1-by-1 subproblem at D(N), which is not solved
312 *              explicitly.
313 *
314                NSIZE = I - ST + 1
315                IWORK( SIZEI+NSUB-1 ) = NSIZE
316                NSUB = NSUB + 1
317                IWORK( NSUB ) = N
318                IWORK( SIZEI+NSUB-1 ) = 1
319                CALL DCOPY( NRHS, B( N, 1 ), LDB, WORK( BX+NM1 ), N )
320             END IF
321             ST1 = ST - 1
322             IF( NSIZE.EQ.1 ) THEN
323 *
324 *              This is a 1-by-1 subproblem and is not solved
325 *              explicitly.
326 *
327                CALL DCOPY( NRHS, B( ST, 1 ), LDB, WORK( BX+ST1 ), N )
328             ELSE IF( NSIZE.LE.SMLSIZ ) THEN
329 *
330 *              This is a small subproblem and is solved by DLASDQ.
331 *
332                CALL DLASET( 'A', NSIZE, NSIZE, ZERO, ONE,
333      $                      WORK( VT+ST1 ), N )
334                CALL DLASDQ( 'U'0, NSIZE, NSIZE, 0, NRHS, D( ST ),
335      $                      E( ST ), WORK( VT+ST1 ), N, WORK( NWORK ),
336      $                      N, B( ST, 1 ), LDB, WORK( NWORK ), INFO )
337                IF( INFO.NE.0 ) THEN
338                   RETURN
339                END IF
340                CALL DLACPY( 'A', NSIZE, NRHS, B( ST, 1 ), LDB,
341      $                      WORK( BX+ST1 ), N )
342             ELSE
343 *
344 *              A large problem. Solve it using divide and conquer.
345 *
346                CALL DLASDA( ICMPQ1, SMLSIZ, NSIZE, SQRE, D( ST ),
347      $                      E( ST ), WORK( U+ST1 ), N, WORK( VT+ST1 ),
348      $                      IWORK( K+ST1 ), WORK( DIFL+ST1 ),
349      $                      WORK( DIFR+ST1 ), WORK( Z+ST1 ),
350      $                      WORK( POLES+ST1 ), IWORK( GIVPTR+ST1 ),
351      $                      IWORK( GIVCOL+ST1 ), N, IWORK( PERM+ST1 ),
352      $                      WORK( GIVNUM+ST1 ), WORK( C+ST1 ),
353      $                      WORK( S+ST1 ), WORK( NWORK ), IWORK( IWK ),
354      $                      INFO )
355                IF( INFO.NE.0 ) THEN
356                   RETURN
357                END IF
358                BXST = BX + ST1
359                CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, B( ST, 1 ),
360      $                      LDB, WORK( BXST ), N, WORK( U+ST1 ), N,
361      $                      WORK( VT+ST1 ), IWORK( K+ST1 ),
362      $                      WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
363      $                      WORK( Z+ST1 ), WORK( POLES+ST1 ),
364      $                      IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
365      $                      IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
366      $                      WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
367      $                      IWORK( IWK ), INFO )
368                IF( INFO.NE.0 ) THEN
369                   RETURN
370                END IF
371             END IF
372             ST = I + 1
373          END IF
374    60 CONTINUE
375 *
376 *     Apply the singular values and treat the tiny ones as zero.
377 *
378       TOL = RCND*ABS( D( IDAMAX( N, D, 1 ) ) )
379 *
380       DO 70 I = 1, N
381 *
382 *        Some of the elements in D can be negative because 1-by-1
383 *        subproblems were not solved explicitly.
384 *
385          IFABS( D( I ) ).LE.TOL ) THEN
386             CALL DLASET( 'A'1, NRHS, ZERO, ZERO, WORK( BX+I-1 ), N )
387          ELSE
388             RANK = RANK + 1
389             CALL DLASCL( 'G'00, D( I ), ONE, 1, NRHS,
390      $                   WORK( BX+I-1 ), N, INFO )
391          END IF
392          D( I ) = ABS( D( I ) )
393    70 CONTINUE
394 *
395 *     Now apply back the right singular vectors.
396 *
397       ICMPQ2 = 1
398       DO 80 I = 1, NSUB
399          ST = IWORK( I )
400          ST1 = ST - 1
401          NSIZE = IWORK( SIZEI+I-1 )
402          BXST = BX + ST1
403          IF( NSIZE.EQ.1 ) THEN
404             CALL DCOPY( NRHS, WORK( BXST ), N, B( ST, 1 ), LDB )
405          ELSE IF( NSIZE.LE.SMLSIZ ) THEN
406             CALL DGEMM( 'T''N', NSIZE, NRHS, NSIZE, ONE,
407      $                  WORK( VT+ST1 ), N, WORK( BXST ), N, ZERO,
408      $                  B( ST, 1 ), LDB )
409          ELSE
410             CALL DLALSA( ICMPQ2, SMLSIZ, NSIZE, NRHS, WORK( BXST ), N,
411      $                   B( ST, 1 ), LDB, WORK( U+ST1 ), N,
412      $                   WORK( VT+ST1 ), IWORK( K+ST1 ),
413      $                   WORK( DIFL+ST1 ), WORK( DIFR+ST1 ),
414      $                   WORK( Z+ST1 ), WORK( POLES+ST1 ),
415      $                   IWORK( GIVPTR+ST1 ), IWORK( GIVCOL+ST1 ), N,
416      $                   IWORK( PERM+ST1 ), WORK( GIVNUM+ST1 ),
417      $                   WORK( C+ST1 ), WORK( S+ST1 ), WORK( NWORK ),
418      $                   IWORK( IWK ), INFO )
419             IF( INFO.NE.0 ) THEN
420                RETURN
421             END IF
422          END IF
423    80 CONTINUE
424 *
425 *     Unscale and sort the singular values.
426 *
427       CALL DLASCL( 'G'00, ONE, ORGNRM, N, 1, D, N, INFO )
428       CALL DLASRT( 'D', N, D, INFO )
429       CALL DLASCL( 'G'00, ORGNRM, ONE, N, NRHS, B, LDB, INFO )
430 *
431       RETURN
432 *
433 *     End of DLALSD
434 *
435       END