dsgesv.f (flens/lapack/interface/ref

  1       SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,

  2      $                   SWORK, ITER, INFO )

  3 *

  4 *  -- LAPACK PROTOTYPE driver routine (version 3.3.1) --

  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  7 *  -- April 2011                                                      --

  8 *

  9 *     ..

 10 *     .. Scalar Arguments ..

 11       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS

 12 *     ..

 13 *     .. Array Arguments ..

 14       INTEGER            IPIV( * )

 15       REAL               SWORK( * )

 16       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),

 17      $                   X( LDX, * )

 18 *     ..

 19 *

 20 *  Purpose

 21 *  =======

 22 *

 23 *  DSGESV computes the solution to a real system of linear equations

 24 *     A * X = B,

 25 *  where A is an N-by-N matrix and X and B are N-by-NRHS matrices.

 26 *

 27 *  DSGESV first attempts to factorize the matrix in SINGLE PRECISION

 28 *  and use this factorization within an iterative refinement procedure

 29 *  to produce a solution with DOUBLE PRECISION normwise backward error

 30 *  quality (see below). If the approach fails the method switches to a

 31 *  DOUBLE PRECISION factorization and solve.

 32 *

 33 *  The iterative refinement is not going to be a winning strategy if

 34 *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION

 35 *  performance is too small. A reasonable strategy should take the

 36 *  number of right-hand sides and the size of the matrix into account.

 37 *  This might be done with a call to ILAENV in the future. Up to now, we

 38 *  always try iterative refinement.

 39 *

 40 *  The iterative refinement process is stopped if

 41 *      ITER > ITERMAX

 42 *  or for all the RHS we have:

 43 *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX

 44 *  where

 45 *      o ITER is the number of the current iteration in the iterative

 46 *        refinement process

 47 *      o RNRM is the infinity-norm of the residual

 48 *      o XNRM is the infinity-norm of the solution

 49 *      o ANRM is the infinity-operator-norm of the matrix A

 50 *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')

 51 *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00

 52 *  respectively.

 53 *

 54 *  Arguments

 55 *  =========

 56 *

 57 *  N       (input) INTEGER

 58 *          The number of linear equations, i.e., the order of the

 59 *          matrix A.  N >= 0.

 60 *

 61 *  NRHS    (input) INTEGER

 62 *          The number of right hand sides, i.e., the number of columns

 63 *          of the matrix B.  NRHS >= 0.

 64 *

 65 *  A       (input/output) DOUBLE PRECISION array,

 66 *          dimension (LDA,N)

 67 *          On entry, the N-by-N coefficient matrix A.

 68 *          On exit, if iterative refinement has been successfully used

 69 *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is

 70 *          unchanged, if double precision factorization has been used

 71 *          (INFO.EQ.0 and ITER.LT.0, see description below), then the

 72 *          array A contains the factors L and U from the factorization

 73 *          A = P*L*U; the unit diagonal elements of L are not stored.

 74 *

 75 *  LDA     (input) INTEGER

 76 *          The leading dimension of the array A.  LDA >= max(1,N).

 77 *

 78 *  IPIV    (output) INTEGER array, dimension (N)

 79 *          The pivot indices that define the permutation matrix P;

 80 *          row i of the matrix was interchanged with row IPIV(i).

 81 *          Corresponds either to the single precision factorization

 82 *          (if INFO.EQ.0 and ITER.GE.0) or the double precision

 83 *          factorization (if INFO.EQ.0 and ITER.LT.0).

 84 *

 85 *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)

 86 *          The N-by-NRHS right hand side matrix B.

 87 *

 88 *  LDB     (input) INTEGER

 89 *          The leading dimension of the array B.  LDB >= max(1,N).

 90 *

 91 *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)

 92 *          If INFO = 0, the N-by-NRHS solution matrix X.

 93 *

 94 *  LDX     (input) INTEGER

 95 *          The leading dimension of the array X.  LDX >= max(1,N).

 96 *

 97 *  WORK    (workspace) DOUBLE PRECISION array, dimension (N,NRHS)

 98 *          This array is used to hold the residual vectors.

 99 *

100 *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))

101 *          This array is used to use the single precision matrix and the

102 *          right-hand sides or solutions in single precision.

103 *

104 *  ITER    (output) INTEGER

105 *          < 0: iterative refinement has failed, double precision

106 *               factorization has been performed

107 *               -1 : the routine fell back to full precision for

108 *                    implementation- or machine-specific reasons

109 *               -2 : narrowing the precision induced an overflow,

110 *                    the routine fell back to full precision

111 *               -3 : failure of SGETRF

112 *               -31: stop the iterative refinement after the 30th

113 *                    iterations

114 *          > 0: iterative refinement has been sucessfully used.

115 *               Returns the number of iterations

116 *

117 *  INFO    (output) INTEGER

118 *          = 0:  successful exit

119 *          < 0:  if INFO = -i, the i-th argument had an illegal value

120 *          > 0:  if INFO = i, U(i,i) computed in DOUBLE PRECISION is

121 *                exactly zero.  The factorization has been completed,

122 *                but the factor U is exactly singular, so the solution

123 *                could not be computed.

124 *

125 *  =====================================================================

126 *

127 *     .. Parameters ..

128       LOGICAL            DOITREF

129       PARAMETER          ( DOITREF = .TRUE. )

130 *

131       INTEGER            ITERMAX

132       PARAMETER          ( ITERMAX = 30 )

133 *

134       DOUBLE PRECISION   BWDMAX

135       PARAMETER          ( BWDMAX = 1.0E+00 )

136 *

137       DOUBLE PRECISION   NEGONE, ONE

138       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )

139 *

140 *     .. Local Scalars ..

141       INTEGER            I, IITER, PTSA, PTSX

142       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM

143 *

144 *     .. External Subroutines ..

145       EXTERNAL           DAXPY, DGEMM, DLACPY, DLAG2S, SLAG2D, SGETRF,

146      $                   SGETRS, XERBLA

147 *     ..

148 *     .. External Functions ..

149       INTEGER            IDAMAX

150       DOUBLE PRECISION   DLAMCH, DLANGE

151       EXTERNAL           IDAMAX, DLAMCH, DLANGE

152 *     ..

153 *     .. Intrinsic Functions ..

154       INTRINSIC          ABS, DBLE, MAX, SQRT

155 *     ..

156 *     .. Executable Statements ..

157 *

158       INFO = 0

159       ITER = 0

160 *

161 *     Test the input parameters.

162 *

163       IF( N.LT.0 ) THEN

164          INFO = -1

165       ELSE IF( NRHS.LT.0 ) THEN

166          INFO = -2

167       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN

168          INFO = -4

169       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN

170          INFO = -7

171       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN

172          INFO = -9

173       END IF

174       IF( INFO.NE.0 ) THEN

175          CALL XERBLA( 'DSGESV', -INFO )

176          RETURN

177       END IF

178 *

179 *     Quick return if (N.EQ.0).

180 *

181       IF( N.EQ.0 )

182      $   RETURN

183 *

184 *     Skip single precision iterative refinement if a priori slower

185 *     than double precision factorization.

186 *

187       IF( .NOT.DOITREF ) THEN

188          ITER = -1

189          GO TO 40

190       END IF

191 *

192 *     Compute some constants.

193 *

194       ANRM = DLANGE( 'I', N, N, A, LDA, WORK )

195       EPS = DLAMCH( 'Epsilon' )

196       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX

197 *

198 *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.

199 *

200       PTSA = 1

201       PTSX = PTSA + N*N

202 *

203 *     Convert B from double precision to single precision and store the

204 *     result in SX.

205 *

206       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )

207 *

208       IF( INFO.NE.0 ) THEN

209          ITER = -2

210          GO TO 40

211       END IF

212 *

213 *     Convert A from double precision to single precision and store the

214 *     result in SA.

215 *

216       CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )

217 *

218       IF( INFO.NE.0 ) THEN

219          ITER = -2

220          GO TO 40

221       END IF

222 *

223 *     Compute the LU factorization of SA.

224 *

225       CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )

226 *

227       IF( INFO.NE.0 ) THEN

228          ITER = -3

229          GO TO 40

230       END IF

231 *

232 *     Solve the system SA*SX = SB.

233 *

234       CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,

235      $             SWORK( PTSX ), N, INFO )

236 *

237 *     Convert SX back to double precision

238 *

239       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )

240 *

241 *     Compute R = B - AX (R is WORK).

242 *

243       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )

244 *

245       CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,

246      $            LDA, X, LDX, ONE, WORK, N )

247 *

248 *     Check whether the NRHS normwise backward errors satisfy the

249 *     stopping criterion. If yes, set ITER=0 and return.

250 *

251       DO I = 1, NRHS

252          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )

253          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )

254          IF( RNRM.GT.XNRM*CTE )

255      $      GO TO 10

256       END DO

257 *

258 *     If we are here, the NRHS normwise backward errors satisfy the

259 *     stopping criterion. We are good to exit.

260 *

261       ITER = 0

262       RETURN

263 *

264    10 CONTINUE

265 *

266       DO 30 IITER = 1, ITERMAX

267 *

268 *        Convert R (in WORK) from double precision to single precision

269 *        and store the result in SX.

270 *

271          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )

272 *

273          IF( INFO.NE.0 ) THEN

274             ITER = -2

275             GO TO 40

276          END IF

277 *

278 *        Solve the system SA*SX = SR.

279 *

280          CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,

281      $                SWORK( PTSX ), N, INFO )

282 *

283 *        Convert SX back to double precision and update the current

284 *        iterate.

285 *

286          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )

287 *

288          DO I = 1, NRHS

289             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )

290          END DO

291 *

292 *        Compute R = B - AX (R is WORK).

293 *

294          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )

295 *

296          CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,

297      $               A, LDA, X, LDX, ONE, WORK, N )

298 *

299 *        Check whether the NRHS normwise backward errors satisfy the

300 *        stopping criterion. If yes, set ITER=IITER>0 and return.

301 *

302          DO I = 1, NRHS

303             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )

304             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )

305             IF( RNRM.GT.XNRM*CTE )

306      $         GO TO 20

307          END DO

308 *

309 *        If we are here, the NRHS normwise backward errors satisfy the

310 *        stopping criterion, we are good to exit.

311 *

312          ITER = IITER

313 *

314          RETURN

315 *

316    20    CONTINUE

317 *

318    30 CONTINUE

319 *

320 *     If we are at this place of the code, this is because we have

321 *     performed ITER=ITERMAX iterations and never satisified the

322 *     stopping criterion, set up the ITER flag accordingly and follow up

323 *     on double precision routine.

324 *

325       ITER = -ITERMAX - 1

326 *

327    40 CONTINUE

328 *

329 *     Single-precision iterative refinement failed to converge to a

330 *     satisfactory solution, so we resort to double precision.

331 *

332       CALL DGETRF( N, N, A, LDA, IPIV, INFO )

333 *

334       IF( INFO.NE.0 )

335      $   RETURN

336 *

337       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )

338       CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,

339      $             INFO )

340 *

341       RETURN

342 *

343 *     End of DSGESV.

344 *

345       END