1       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
  2      $                   SWORK, ITER, INFO )
  3 *
  4 *  -- LAPACK PROTOTYPE driver routine (version 3.3.1) --
  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  7 *  -- April 2011                                                      --
  8 *
  9 *     ..
 10 *     .. Scalar Arguments ..
 11       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS
 12 *     ..
 13 *     .. Array Arguments ..
 14       INTEGER            IPIV( * )
 15       REAL               SWORK( * )
 16       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),
 17      $                   X( LDX, * )
 18 *     ..
 19 *
 20 *  Purpose
 21 *  =======
 22 *
 23 *  DSGESV computes the solution to a real system of linear equations
 24 *     A * X = B,
 25 *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
 26 *
 27 *  DSGESV first attempts to factorize the matrix in SINGLE PRECISION
 28 *  and use this factorization within an iterative refinement procedure
 29 *  to produce a solution with DOUBLE PRECISION normwise backward error
 30 *  quality (see below). If the approach fails the method switches to a
 31 *  DOUBLE PRECISION factorization and solve.
 32 *
 33 *  The iterative refinement is not going to be a winning strategy if
 34 *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION
 35 *  performance is too small. A reasonable strategy should take the
 36 *  number of right-hand sides and the size of the matrix into account.
 37 *  This might be done with a call to ILAENV in the future. Up to now, we
 38 *  always try iterative refinement.
 39 *
 40 *  The iterative refinement process is stopped if
 41 *      ITER > ITERMAX
 42 *  or for all the RHS we have:
 43 *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
 44 *  where
 45 *      o ITER is the number of the current iteration in the iterative
 46 *        refinement process
 47 *      o RNRM is the infinity-norm of the residual
 48 *      o XNRM is the infinity-norm of the solution
 49 *      o ANRM is the infinity-operator-norm of the matrix A
 50 *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')
 51 *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
 52 *  respectively.
 53 *
 54 *  Arguments
 55 *  =========
 56 *
 57 *  N       (input) INTEGER
 58 *          The number of linear equations, i.e., the order of the
 59 *          matrix A.  N >= 0.
 60 *
 61 *  NRHS    (input) INTEGER
 62 *          The number of right hand sides, i.e., the number of columns
 63 *          of the matrix B.  NRHS >= 0.
 64 *
 65 *  A       (input/output) DOUBLE PRECISION array,
 66 *          dimension (LDA,N)
 67 *          On entry, the N-by-N coefficient matrix A.
 68 *          On exit, if iterative refinement has been successfully used
 69 *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is
 70 *          unchanged, if double precision factorization has been used
 71 *          (INFO.EQ.0 and ITER.LT.0, see description below), then the
 72 *          array A contains the factors L and U from the factorization
 73 *          A = P*L*U; the unit diagonal elements of L are not stored.
 74 *
 75 *  LDA     (input) INTEGER
 76 *          The leading dimension of the array A.  LDA >= max(1,N).
 77 *
 78 *  IPIV    (output) INTEGER array, dimension (N)
 79 *          The pivot indices that define the permutation matrix P;
 80 *          row i of the matrix was interchanged with row IPIV(i).
 81 *          Corresponds either to the single precision factorization
 82 *          (if INFO.EQ.0 and ITER.GE.0) or the double precision
 83 *          factorization (if INFO.EQ.0 and ITER.LT.0).
 84 *
 85 *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)
 86 *          The N-by-NRHS right hand side matrix B.
 87 *
 88 *  LDB     (input) INTEGER
 89 *          The leading dimension of the array B.  LDB >= max(1,N).
 90 *
 91 *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)
 92 *          If INFO = 0, the N-by-NRHS solution matrix X.
 93 *
 94 *  LDX     (input) INTEGER
 95 *          The leading dimension of the array X.  LDX >= max(1,N).
 96 *
 97 *  WORK    (workspace) DOUBLE PRECISION array, dimension (N,NRHS)
 98 *          This array is used to hold the residual vectors.
 99 *
100 *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))
101 *          This array is used to use the single precision matrix and the
102 *          right-hand sides or solutions in single precision.
103 *
104 *  ITER    (output) INTEGER
105 *          < 0: iterative refinement has failed, double precision
106 *               factorization has been performed
107 *               -1 : the routine fell back to full precision for
108 *                    implementation- or machine-specific reasons
109 *               -2 : narrowing the precision induced an overflow,
110 *                    the routine fell back to full precision
111 *               -3 : failure of SGETRF
112 *               -31: stop the iterative refinement after the 30th
113 *                    iterations
114 *          > 0: iterative refinement has been sucessfully used.
115 *               Returns the number of iterations
116 *
117 *  INFO    (output) INTEGER
118 *          = 0:  successful exit
119 *          < 0:  if INFO = -i, the i-th argument had an illegal value
120 *          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is
121 *                exactly zero.  The factorization has been completed,
122 *                but the factor U is exactly singular, so the solution
123 *                could not be computed.
124 *
125 *  =====================================================================
126 *
127 *     .. Parameters ..
128       LOGICAL            DOITREF
129       PARAMETER          ( DOITREF = .TRUE. )
130 *
131       INTEGER            ITERMAX
132       PARAMETER          ( ITERMAX = 30 )
133 *
134       DOUBLE PRECISION   BWDMAX
135       PARAMETER          ( BWDMAX = 1.0E+00 )
136 *
137       DOUBLE PRECISION   NEGONE, ONE
138       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
139 *
140 *     .. Local Scalars ..
141       INTEGER            I, IITER, PTSA, PTSX
142       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM
143 *
144 *     .. External Subroutines ..
145       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,
146      $                   SGETRS, XERBLA
147 *     ..
148 *     .. External Functions ..
149       INTEGER            IDAMAX
150       DOUBLE PRECISION   DLAMCH, DLANGE
151       EXTERNAL           IDAMAX, DLAMCH, DLANGE
152 *     ..
153 *     .. Intrinsic Functions ..
154       INTRINSIC          ABSDBLEMAXSQRT
155 *     ..
156 *     .. Executable Statements ..
157 *
158       INFO = 0
159       ITER = 0
160 *
161 *     Test the input parameters.
162 *
163       IF( N.LT.0 ) THEN
164          INFO = -1
165       ELSE IF( NRHS.LT.0 ) THEN
166          INFO = -2
167       ELSE IF( LDA.LT.MAX1, N ) ) THEN
168          INFO = -4
169       ELSE IF( LDB.LT.MAX1, N ) ) THEN
170          INFO = -7
171       ELSE IF( LDX.LT.MAX1, N ) ) THEN
172          INFO = -9
173       END IF
174       IF( INFO.NE.0 ) THEN
175          CALL XERBLA( 'DSGESV'-INFO )
176          RETURN
177       END IF
178 *
179 *     Quick return if (N.EQ.0).
180 *
181       IF( N.EQ.0 )
182      $   RETURN
183 *
184 *     Skip single precision iterative refinement if a priori slower
185 *     than double precision factorization.
186 *
187       IF.NOT.DOITREF ) THEN
188          ITER = -1
189          GO TO 40
190       END IF
191 *
192 *     Compute some constants.
193 *
194       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
195       EPS = DLAMCH( 'Epsilon' )
196       CTE = ANRM*EPS*SQRTDBLE( N ) )*BWDMAX
197 *
198 *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
199 *
200       PTSA = 1
201       PTSX = PTSA + N*N
202 *
203 *     Convert B from double precision to single precision and store the
204 *     result in SX.
205 *
206       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
207 *
208       IF( INFO.NE.0 ) THEN
209          ITER = -2
210          GO TO 40
211       END IF
212 *
213 *     Convert A from double precision to single precision and store the
214 *     result in SA.
215 *
216       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
217 *
218       IF( INFO.NE.0 ) THEN
219          ITER = -2
220          GO TO 40
221       END IF
222 *
223 *     Compute the LU factorization of SA.
224 *
225       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
226 *
227       IF( INFO.NE.0 ) THEN
228          ITER = -3
229          GO TO 40
230       END IF
231 *
232 *     Solve the system SA*SX = SB.
233 *
234       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
235      $             SWORK( PTSX ), N, INFO )
236 *
237 *     Convert SX back to double precision
238 *
239       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
240 *
241 *     Compute R = B - AX (R is WORK).
242 *
243       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
244 *
245       CALL DGEMM( 'No Transpose''No Transpose', N, NRHS, N, NEGONE, A,
246      $            LDA, X, LDX, ONE, WORK, N )
247 *
248 *     Check whether the NRHS normwise backward errors satisfy the
249 *     stopping criterion. If yes, set ITER=0 and return.
250 *
251       DO I = 1, NRHS
252          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
253          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
254          IF( RNRM.GT.XNRM*CTE )
255      $      GO TO 10
256       END DO
257 *
258 *     If we are here, the NRHS normwise backward errors satisfy the
259 *     stopping criterion. We are good to exit.
260 *
261       ITER = 0
262       RETURN
263 *
264    10 CONTINUE
265 *
266       DO 30 IITER = 1, ITERMAX
267 *
268 *        Convert R (in WORK) from double precision to single precision
269 *        and store the result in SX.
270 *
271          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
272 *
273          IF( INFO.NE.0 ) THEN
274             ITER = -2
275             GO TO 40
276          END IF
277 *
278 *        Solve the system SA*SX = SR.
279 *
280          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
281      $                SWORK( PTSX ), N, INFO )
282 *
283 *        Convert SX back to double precision and update the current
284 *        iterate.
285 *
286          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
287 *
288          DO I = 1, NRHS
289             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
290          END DO
291 *
292 *        Compute R = B - AX (R is WORK).
293 *
294          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
295 *
296          CALL DGEMM( 'No Transpose''No Transpose', N, NRHS, N, NEGONE,
297      $               A, LDA, X, LDX, ONE, WORK, N )
298 *
299 *        Check whether the NRHS normwise backward errors satisfy the
300 *        stopping criterion. If yes, set ITER=IITER>0 and return.
301 *
302          DO I = 1, NRHS
303             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
304             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
305             IF( RNRM.GT.XNRM*CTE )
306      $         GO TO 20
307          END DO
308 *
309 *        If we are here, the NRHS normwise backward errors satisfy the
310 *        stopping criterion, we are good to exit.
311 *
312          ITER = IITER
313 *
314          RETURN
315 *
316    20    CONTINUE
317 *
318    30 CONTINUE
319 *
320 *     If we are at this place of the code, this is because we have
321 *     performed ITER=ITERMAX iterations and never satisified the
322 *     stopping criterion, set up the ITER flag accordingly and follow up
323 *     on double precision routine.
324 *
325       ITER = -ITERMAX - 1
326 *
327    40 CONTINUE
328 *
329 *     Single-precision iterative refinement failed to converge to a
330 *     satisfactory solution, so we resort to double precision.
331 *
332       CALL DGETRF( N, N, A, LDA, IPIV, INFO )
333 *
334       IF( INFO.NE.0 )
335      $   RETURN
336 *
337       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
338       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
339      $             INFO )
340 *
341       RETURN
342 *
343 *     End of DSGESV.
344 *
345       END