1       SUBROUTINE DGELSY( M, N, NRHS, A, LDA, B, LDB, JPVT, RCOND, RANK,
  2      $                   WORK, LWORK, INFO )
  3 *
  4 *  -- LAPACK driver routine (version 3.3.1) --
  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  7 *  -- April 2011                                                      --
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            INFO, LDA, LDB, LWORK, M, N, NRHS, RANK
 11       DOUBLE PRECISION   RCOND
 12 *     ..
 13 *     .. Array Arguments ..
 14       INTEGER            JPVT( * )
 15       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( * )
 16 *     ..
 17 *
 18 *  Purpose
 19 *  =======
 20 *
 21 *  DGELSY computes the minimum-norm solution to a real linear least
 22 *  squares problem:
 23 *      minimize || A * X - B ||
 24 *  using a complete orthogonal factorization of A.  A is an M-by-N
 25 *  matrix which may be rank-deficient.
 26 *
 27 *  Several right hand side vectors b and solution vectors x can be
 28 *  handled in a single call; they are stored as the columns of the
 29 *  M-by-NRHS right hand side matrix B and the N-by-NRHS solution
 30 *  matrix X.
 31 *
 32 *  The routine first computes a QR factorization with column pivoting:
 33 *      A * P = Q * [ R11 R12 ]
 34 *                  [  0  R22 ]
 35 *  with R11 defined as the largest leading submatrix whose estimated
 36 *  condition number is less than 1/RCOND.  The order of R11, RANK,
 37 *  is the effective rank of A.
 38 *
 39 *  Then, R22 is considered to be negligible, and R12 is annihilated
 40 *  by orthogonal transformations from the right, arriving at the
 41 *  complete orthogonal factorization:
 42 *     A * P = Q * [ T11 0 ] * Z
 43 *                 [  0  0 ]
 44 *  The minimum-norm solution is then
 45 *     X = P * Z**T [ inv(T11)*Q1**T*B ]
 46 *                  [        0         ]
 47 *  where Q1 consists of the first RANK columns of Q.
 48 *
 49 *  This routine is basically identical to the original xGELSX except
 50 *  three differences:
 51 *    o The call to the subroutine xGEQPF has been substituted by the
 52 *      the call to the subroutine xGEQP3. This subroutine is a Blas-3
 53 *      version of the QR factorization with column pivoting.
 54 *    o Matrix B (the right hand side) is updated with Blas-3.
 55 *    o The permutation of matrix B (the right hand side) is faster and
 56 *      more simple.
 57 *
 58 *  Arguments
 59 *  =========
 60 *
 61 *  M       (input) INTEGER
 62 *          The number of rows of the matrix A.  M >= 0.
 63 *
 64 *  N       (input) INTEGER
 65 *          The number of columns of the matrix A.  N >= 0.
 66 *
 67 *  NRHS    (input) INTEGER
 68 *          The number of right hand sides, i.e., the number of
 69 *          columns of matrices B and X. NRHS >= 0.
 70 *
 71 *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
 72 *          On entry, the M-by-N matrix A.
 73 *          On exit, A has been overwritten by details of its
 74 *          complete orthogonal factorization.
 75 *
 76 *  LDA     (input) INTEGER
 77 *          The leading dimension of the array A.  LDA >= max(1,M).
 78 *
 79 *  B       (input/output) DOUBLE PRECISION array, dimension (LDB,NRHS)
 80 *          On entry, the M-by-NRHS right hand side matrix B.
 81 *          On exit, the N-by-NRHS solution matrix X.
 82 *
 83 *  LDB     (input) INTEGER
 84 *          The leading dimension of the array B. LDB >= max(1,M,N).
 85 *
 86 *  JPVT    (input/output) INTEGER array, dimension (N)
 87 *          On entry, if JPVT(i) .ne. 0, the i-th column of A is permuted
 88 *          to the front of AP, otherwise column i is a free column.
 89 *          On exit, if JPVT(i) = k, then the i-th column of AP
 90 *          was the k-th column of A.
 91 *
 92 *  RCOND   (input) DOUBLE PRECISION
 93 *          RCOND is used to determine the effective rank of A, which
 94 *          is defined as the order of the largest leading triangular
 95 *          submatrix R11 in the QR factorization with pivoting of A,
 96 *          whose estimated condition number < 1/RCOND.
 97 *
 98 *  RANK    (output) INTEGER
 99 *          The effective rank of A, i.e., the order of the submatrix
100 *          R11.  This is the same as the order of the submatrix T11
101 *          in the complete orthogonal factorization of A.
102 *
103 *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (MAX(1,LWORK))
104 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
105 *
106 *  LWORK   (input) INTEGER
107 *          The dimension of the array WORK.
108 *          The unblocked strategy requires that:
109 *             LWORK >= MAX( MN+3*N+1, 2*MN+NRHS ),
110 *          where MN = min( M, N ).
111 *          The block algorithm requires that:
112 *             LWORK >= MAX( MN+2*N+NB*(N+1), 2*MN+NB*NRHS ),
113 *          where NB is an upper bound on the blocksize returned
114 *          by ILAENV for the routines DGEQP3, DTZRZF, STZRQF, DORMQR,
115 *          and DORMRZ.
116 *
117 *          If LWORK = -1, then a workspace query is assumed; the routine
118 *          only calculates the optimal size of the WORK array, returns
119 *          this value as the first entry of the WORK array, and no error
120 *          message related to LWORK is issued by XERBLA.
121 *
122 *  INFO    (output) INTEGER
123 *          = 0: successful exit
124 *          < 0: If INFO = -i, the i-th argument had an illegal value.
125 *
126 *  Further Details
127 *  ===============
128 *
129 *  Based on contributions by
130 *    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
131 *    E. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
132 *    G. Quintana-Orti, Depto. de Informatica, Universidad Jaime I, Spain
133 *
134 *  =====================================================================
135 *
136 *     .. Parameters ..
137       INTEGER            IMAX, IMIN
138       PARAMETER          ( IMAX = 1, IMIN = 2 )
139       DOUBLE PRECISION   ZERO, ONE
140       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0 )
141 *     ..
142 *     .. Local Scalars ..
143       LOGICAL            LQUERY
144       INTEGER            I, IASCL, IBSCL, ISMAX, ISMIN, J, LWKMIN,
145      $                   LWKOPT, MN, NB, NB1, NB2, NB3, NB4
146       DOUBLE PRECISION   ANRM, BIGNUM, BNRM, C1, C2, S1, S2, SMAX,
147      $                   SMAXPR, SMIN, SMINPR, SMLNUM, WSIZE
148 *     ..
149 *     .. External Functions ..
150       INTEGER            ILAENV
151       DOUBLE PRECISION   DLAMCH, DLANGE
152       EXTERNAL           ILAENV, DLAMCH, DLANGE
153 *     ..
154 *     .. External Subroutines ..
155       EXTERNAL           DCOPY, DGEQP3, DLABAD, DLAIC1, DLASCL, DLASET,
156      $                   DORMQR, DORMRZ, DTRSM, DTZRZF, XERBLA
157 *     ..
158 *     .. Intrinsic Functions ..
159       INTRINSIC          ABSMAXMIN
160 *     ..
161 *     .. Executable Statements ..
162 *
163       MN = MIN( M, N )
164       ISMIN = MN + 1
165       ISMAX = 2*MN + 1
166 *
167 *     Test the input arguments.
168 *
169       INFO = 0
170       LQUERY = ( LWORK.EQ.-1 )
171       IF( M.LT.0 ) THEN
172          INFO = -1
173       ELSE IF( N.LT.0 ) THEN
174          INFO = -2
175       ELSE IF( NRHS.LT.0 ) THEN
176          INFO = -3
177       ELSE IF( LDA.LT.MAX1, M ) ) THEN
178          INFO = -5
179       ELSE IF( LDB.LT.MAX1, M, N ) ) THEN
180          INFO = -7
181       END IF
182 *
183 *     Figure out optimal block size
184 *
185       IF( INFO.EQ.0 ) THEN
186          IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN
187             LWKMIN = 1
188             LWKOPT = 1
189          ELSE
190             NB1 = ILAENV( 1'DGEQRF'' ', M, N, -1-1 )
191             NB2 = ILAENV( 1'DGERQF'' ', M, N, -1-1 )
192             NB3 = ILAENV( 1'DORMQR'' ', M, N, NRHS, -1 )
193             NB4 = ILAENV( 1'DORMRQ'' ', M, N, NRHS, -1 )
194             NB = MAX( NB1, NB2, NB3, NB4 )
195             LWKMIN = MN + MAX2*MN, N + 1, MN + NRHS )
196             LWKOPT = MAX( LWKMIN,
197      $                    MN + 2*+ NB*( N + 1 ), 2*MN + NB*NRHS )
198          END IF
199          WORK( 1 ) = LWKOPT
200 *
201          IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
202             INFO = -12
203          END IF
204       END IF
205 *
206       IF( INFO.NE.0 ) THEN
207          CALL XERBLA( 'DGELSY'-INFO )
208          RETURN
209       ELSE IF( LQUERY ) THEN
210          RETURN
211       END IF
212 *
213 *     Quick return if possible
214 *
215       IF( MN.EQ.0 .OR. NRHS.EQ.0 ) THEN
216          RANK = 0
217          RETURN
218       END IF
219 *
220 *     Get machine parameters
221 *
222       SMLNUM = DLAMCH( 'S' ) / DLAMCH( 'P' )
223       BIGNUM = ONE / SMLNUM
224       CALL DLABAD( SMLNUM, BIGNUM )
225 *
226 *     Scale A, B if max entries outside range [SMLNUM,BIGNUM]
227 *
228       ANRM = DLANGE( 'M', M, N, A, LDA, WORK )
229       IASCL = 0
230       IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
231 *
232 *        Scale matrix norm up to SMLNUM
233 *
234          CALL DLASCL( 'G'00, ANRM, SMLNUM, M, N, A, LDA, INFO )
235          IASCL = 1
236       ELSE IF( ANRM.GT.BIGNUM ) THEN
237 *
238 *        Scale matrix norm down to BIGNUM
239 *
240          CALL DLASCL( 'G'00, ANRM, BIGNUM, M, N, A, LDA, INFO )
241          IASCL = 2
242       ELSE IF( ANRM.EQ.ZERO ) THEN
243 *
244 *        Matrix all zero. Return zero solution.
245 *
246          CALL DLASET( 'F'MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
247          RANK = 0
248          GO TO 70
249       END IF
250 *
251       BNRM = DLANGE( 'M', M, NRHS, B, LDB, WORK )
252       IBSCL = 0
253       IF( BNRM.GT.ZERO .AND. BNRM.LT.SMLNUM ) THEN
254 *
255 *        Scale matrix norm up to SMLNUM
256 *
257          CALL DLASCL( 'G'00, BNRM, SMLNUM, M, NRHS, B, LDB, INFO )
258          IBSCL = 1
259       ELSE IF( BNRM.GT.BIGNUM ) THEN
260 *
261 *        Scale matrix norm down to BIGNUM
262 *
263          CALL DLASCL( 'G'00, BNRM, BIGNUM, M, NRHS, B, LDB, INFO )
264          IBSCL = 2
265       END IF
266 *
267 *     Compute QR factorization with column pivoting of A:
268 *        A * P = Q * R
269 *
270       CALL DGEQP3( M, N, A, LDA, JPVT, WORK( 1 ), WORK( MN+1 ),
271      $             LWORK-MN, INFO )
272       WSIZE = MN + WORK( MN+1 )
273 *
274 *     workspace: MN+2*N+NB*(N+1).
275 *     Details of Householder rotations stored in WORK(1:MN).
276 *
277 *     Determine RANK using incremental condition estimation
278 *
279       WORK( ISMIN ) = ONE
280       WORK( ISMAX ) = ONE
281       SMAX = ABS( A( 11 ) )
282       SMIN = SMAX
283       IFABS( A( 11 ) ).EQ.ZERO ) THEN
284          RANK = 0
285          CALL DLASET( 'F'MAX( M, N ), NRHS, ZERO, ZERO, B, LDB )
286          GO TO 70
287       ELSE
288          RANK = 1
289       END IF
290 *
291    10 CONTINUE
292       IF( RANK.LT.MN ) THEN
293          I = RANK + 1
294          CALL DLAIC1( IMIN, RANK, WORK( ISMIN ), SMIN, A( 1, I ),
295      $                A( I, I ), SMINPR, S1, C1 )
296          CALL DLAIC1( IMAX, RANK, WORK( ISMAX ), SMAX, A( 1, I ),
297      $                A( I, I ), SMAXPR, S2, C2 )
298 *
299          IF( SMAXPR*RCOND.LE.SMINPR ) THEN
300             DO 20 I = 1, RANK
301                WORK( ISMIN+I-1 ) = S1*WORK( ISMIN+I-1 )
302                WORK( ISMAX+I-1 ) = S2*WORK( ISMAX+I-1 )
303    20       CONTINUE
304             WORK( ISMIN+RANK ) = C1
305             WORK( ISMAX+RANK ) = C2
306             SMIN = SMINPR
307             SMAX = SMAXPR
308             RANK = RANK + 1
309             GO TO 10
310          END IF
311       END IF
312 *
313 *     workspace: 3*MN.
314 *
315 *     Logically partition R = [ R11 R12 ]
316 *                             [  0  R22 ]
317 *     where R11 = R(1:RANK,1:RANK)
318 *
319 *     [R11,R12] = [ T11, 0 ] * Y
320 *
321       IF( RANK.LT.N )
322      $   CALL DTZRZF( RANK, N, A, LDA, WORK( MN+1 ), WORK( 2*MN+1 ),
323      $                LWORK-2*MN, INFO )
324 *
325 *     workspace: 2*MN.
326 *     Details of Householder rotations stored in WORK(MN+1:2*MN)
327 *
328 *     B(1:M,1:NRHS) := Q**T * B(1:M,1:NRHS)
329 *
330       CALL DORMQR( 'Left''Transpose', M, NRHS, MN, A, LDA, WORK( 1 ),
331      $             B, LDB, WORK( 2*MN+1 ), LWORK-2*MN, INFO )
332       WSIZE = MAX( WSIZE, 2*MN+WORK( 2*MN+1 ) )
333 *
334 *     workspace: 2*MN+NB*NRHS.
335 *
336 *     B(1:RANK,1:NRHS) := inv(T11) * B(1:RANK,1:NRHS)
337 *
338       CALL DTRSM( 'Left''Upper''No transpose''Non-unit', RANK,
339      $            NRHS, ONE, A, LDA, B, LDB )
340 *
341       DO 40 J = 1, NRHS
342          DO 30 I = RANK + 1, N
343             B( I, J ) = ZERO
344    30    CONTINUE
345    40 CONTINUE
346 *
347 *     B(1:N,1:NRHS) := Y**T * B(1:N,1:NRHS)
348 *
349       IF( RANK.LT.N ) THEN
350          CALL DORMRZ( 'Left''Transpose', N, NRHS, RANK, N-RANK, A,
351      $                LDA, WORK( MN+1 ), B, LDB, WORK( 2*MN+1 ),
352      $                LWORK-2*MN, INFO )
353       END IF
354 *
355 *     workspace: 2*MN+NRHS.
356 *
357 *     B(1:N,1:NRHS) := P * B(1:N,1:NRHS)
358 *
359       DO 60 J = 1, NRHS
360          DO 50 I = 1, N
361             WORK( JPVT( I ) ) = B( I, J )
362    50    CONTINUE
363          CALL DCOPY( N, WORK( 1 ), 1, B( 1, J ), 1 )
364    60 CONTINUE
365 *
366 *     workspace: N.
367 *
368 *     Undo scaling
369 *
370       IF( IASCL.EQ.1 ) THEN
371          CALL DLASCL( 'G'00, ANRM, SMLNUM, N, NRHS, B, LDB, INFO )
372          CALL DLASCL( 'U'00, SMLNUM, ANRM, RANK, RANK, A, LDA,
373      $                INFO )
374       ELSE IF( IASCL.EQ.2 ) THEN
375          CALL DLASCL( 'G'00, ANRM, BIGNUM, N, NRHS, B, LDB, INFO )
376          CALL DLASCL( 'U'00, BIGNUM, ANRM, RANK, RANK, A, LDA,
377      $                INFO )
378       END IF
379       IF( IBSCL.EQ.1 ) THEN
380          CALL DLASCL( 'G'00, SMLNUM, BNRM, N, NRHS, B, LDB, INFO )
381       ELSE IF( IBSCL.EQ.2 ) THEN
382          CALL DLASCL( 'G'00, BIGNUM, BNRM, N, NRHS, B, LDB, INFO )
383       END IF
384 *
385    70 CONTINUE
386       WORK( 1 ) = LWKOPT
387 *
388       RETURN
389 *
390 *     End of DGELSY
391 *
392       END