dsposv.f (flens/lapack/interface/ref

  1       SUBROUTINE DSPOSV( UPLO, N, NRHS, A, LDA, B, LDB, X, LDX, WORK,

  2      $                   SWORK, ITER, INFO )

  3 *

  4 *  -- LAPACK PROTOTYPE driver routine (version 3.3.1) --

  5 *     Univ. of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..

  6 *  -- April 2011                                                      --

  7 *

  8 *     ..

  9 *     .. Scalar Arguments ..

 10       CHARACTER          UPLO

 11       INTEGER            INFO, ITER, LDA, LDB, LDX, N, NRHS

 12 *     ..

 13 *     .. Array Arguments ..

 14       REAL               SWORK( * )

 15       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ), WORK( N, * ),

 16      $                   X( LDX, * )

 17 *     ..

 18 *

 19 *  Purpose

 20 *  =======

 21 *

 22 *  DSPOSV computes the solution to a real system of linear equations

 23 *     A * X = B,

 24 *  where A is an N-by-N symmetric positive definite matrix and X and B

 25 *  are N-by-NRHS matrices.

 26 *

 27 *  DSPOSV first attempts to factorize the matrix in SINGLE PRECISION

 28 *  and use this factorization within an iterative refinement procedure

 29 *  to produce a solution with DOUBLE PRECISION normwise backward error

 30 *  quality (see below). If the approach fails the method switches to a

 31 *  DOUBLE PRECISION factorization and solve.

 32 *

 33 *  The iterative refinement is not going to be a winning strategy if

 34 *  the ratio SINGLE PRECISION performance over DOUBLE PRECISION

 35 *  performance is too small. A reasonable strategy should take the

 36 *  number of right-hand sides and the size of the matrix into account.

 37 *  This might be done with a call to ILAENV in the future. Up to now, we

 38 *  always try iterative refinement.

 39 *

 40 *  The iterative refinement process is stopped if

 41 *      ITER > ITERMAX

 42 *  or for all the RHS we have:

 43 *      RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX

 44 *  where

 45 *      o ITER is the number of the current iteration in the iterative

 46 *        refinement process

 47 *      o RNRM is the infinity-norm of the residual

 48 *      o XNRM is the infinity-norm of the solution

 49 *      o ANRM is the infinity-operator-norm of the matrix A

 50 *      o EPS is the machine epsilon returned by DLAMCH('Epsilon')

 51 *  The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00

 52 *  respectively.

 53 *

 54 *  Arguments

 55 *  =========

 56 *

 57 *  UPLO    (input) CHARACTER*1

 58 *          = 'U':  Upper triangle of A is stored;

 59 *          = 'L':  Lower triangle of A is stored.

 60 *

 61 *  N       (input) INTEGER

 62 *          The number of linear equations, i.e., the order of the

 63 *          matrix A.  N >= 0.

 64 *

 65 *  NRHS    (input) INTEGER

 66 *          The number of right hand sides, i.e., the number of columns

 67 *          of the matrix B.  NRHS >= 0.

 68 *

 69 *  A       (input/output) DOUBLE PRECISION array,

 70 *          dimension (LDA,N)

 71 *          On entry, the symmetric matrix A.  If UPLO = 'U', the leading

 72 *          N-by-N upper triangular part of A contains the upper

 73 *          triangular part of the matrix A, and the strictly lower

 74 *          triangular part of A is not referenced.  If UPLO = 'L', the

 75 *          leading N-by-N lower triangular part of A contains the lower

 76 *          triangular part of the matrix A, and the strictly upper

 77 *          triangular part of A is not referenced.

 78 *          On exit, if iterative refinement has been successfully used

 79 *          (INFO.EQ.0 and ITER.GE.0, see description below), then A is

 80 *          unchanged, if double precision factorization has been used

 81 *          (INFO.EQ.0 and ITER.LT.0, see description below), then the

 82 *          array A contains the factor U or L from the Cholesky

 83 *          factorization A = U**T*U or A = L*L**T.

 84 *

 85 *

 86 *  LDA     (input) INTEGER

 87 *          The leading dimension of the array A.  LDA >= max(1,N).

 88 *

 89 *  B       (input) DOUBLE PRECISION array, dimension (LDB,NRHS)

 90 *          The N-by-NRHS right hand side matrix B.

 91 *

 92 *  LDB     (input) INTEGER

 93 *          The leading dimension of the array B.  LDB >= max(1,N).

 94 *

 95 *  X       (output) DOUBLE PRECISION array, dimension (LDX,NRHS)

 96 *          If INFO = 0, the N-by-NRHS solution matrix X.

 97 *

 98 *  LDX     (input) INTEGER

 99 *          The leading dimension of the array X.  LDX >= max(1,N).

100 *

101 *  WORK    (workspace) DOUBLE PRECISION array, dimension (N,NRHS)

102 *          This array is used to hold the residual vectors.

103 *

104 *  SWORK   (workspace) REAL array, dimension (N*(N+NRHS))

105 *          This array is used to use the single precision matrix and the

106 *          right-hand sides or solutions in single precision.

107 *

108 *  ITER    (output) INTEGER

109 *          < 0: iterative refinement has failed, double precision

110 *               factorization has been performed

111 *               -1 : the routine fell back to full precision for

112 *                    implementation- or machine-specific reasons

113 *               -2 : narrowing the precision induced an overflow,

114 *                    the routine fell back to full precision

115 *               -3 : failure of SPOTRF

116 *               -31: stop the iterative refinement after the 30th

117 *                    iterations

118 *          > 0: iterative refinement has been sucessfully used.

119 *               Returns the number of iterations

120 *

121 *  INFO    (output) INTEGER

122 *          = 0:  successful exit

123 *          < 0:  if INFO = -i, the i-th argument had an illegal value

124 *          > 0:  if INFO = i, the leading minor of order i of (DOUBLE

125 *                PRECISION) A is not positive definite, so the

126 *                factorization could not be completed, and the solution

127 *                has not been computed.

128 *

129 *  =====================================================================

130 *

131 *     .. Parameters ..

132       LOGICAL            DOITREF

133       PARAMETER          ( DOITREF = .TRUE. )

134 *

135       INTEGER            ITERMAX

136       PARAMETER          ( ITERMAX = 30 )

137 *

138       DOUBLE PRECISION   BWDMAX

139       PARAMETER          ( BWDMAX = 1.0E+00 )

140 *

141       DOUBLE PRECISION   NEGONE, ONE

142       PARAMETER          ( NEGONE = -1.0D+0, ONE = 1.0D+0 )

143 *

144 *     .. Local Scalars ..

145       INTEGER            I, IITER, PTSA, PTSX

146       DOUBLE PRECISION   ANRM, CTE, EPS, RNRM, XNRM

147 *

148 *     .. External Subroutines ..

149       EXTERNAL           DAXPY, DSYMM, DLACPY, DLAT2S, DLAG2S, SLAG2D,

150      $                   SPOTRF, SPOTRS, XERBLA

151 *     ..

152 *     .. External Functions ..

153       INTEGER            IDAMAX

154       DOUBLE PRECISION   DLAMCH, DLANSY

155       LOGICAL            LSAME

156       EXTERNAL           IDAMAX, DLAMCH, DLANSY, LSAME

157 *     ..

158 *     .. Intrinsic Functions ..

159       INTRINSIC          ABS, DBLE, MAX, SQRT

160 *     ..

161 *     .. Executable Statements ..

162 *

163       INFO = 0

164       ITER = 0

165 *

166 *     Test the input parameters.

167 *

168       IF( .NOT.LSAME( UPLO, 'U' ) .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN

169          INFO = -1

170       ELSE IF( N.LT.0 ) THEN

171          INFO = -2

172       ELSE IF( NRHS.LT.0 ) THEN

173          INFO = -3

174       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN

175          INFO = -5

176       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN

177          INFO = -7

178       ELSE IF( LDX.LT.MAX( 1, N ) ) THEN

179          INFO = -9

180       END IF

181       IF( INFO.NE.0 ) THEN

182          CALL XERBLA( 'DSPOSV', -INFO )

183          RETURN

184       END IF

185 *

186 *     Quick return if (N.EQ.0).

187 *

188       IF( N.EQ.0 )

189      $   RETURN

190 *

191 *     Skip single precision iterative refinement if a priori slower

192 *     than double precision factorization.

193 *

194       IF( .NOT.DOITREF ) THEN

195          ITER = -1

196          GO TO 40

197       END IF

198 *

199 *     Compute some constants.

200 *

201       ANRM = DLANSY( 'I', UPLO, N, A, LDA, WORK )

202       EPS = DLAMCH( 'Epsilon' )

203       CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX

204 *

205 *     Set the indices PTSA, PTSX for referencing SA and SX in SWORK.

206 *

207       PTSA = 1

208       PTSX = PTSA + N*N

209 *

210 *     Convert B from double precision to single precision and store the

211 *     result in SX.

212 *

213       CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )

214 *

215       IF( INFO.NE.0 ) THEN

216          ITER = -2

217          GO TO 40

218       END IF

219 *

220 *     Convert A from double precision to single precision and store the

221 *     result in SA.

222 *

223       CALL DLAT2S( UPLO, N, A, LDA, SWORK( PTSA ), N, INFO )

224 *

225       IF( INFO.NE.0 ) THEN

226          ITER = -2

227          GO TO 40

228       END IF

229 *

230 *     Compute the Cholesky factorization of SA.

231 *

232       CALL SPOTRF( UPLO, N, SWORK( PTSA ), N, INFO )

233 *

234       IF( INFO.NE.0 ) THEN

235          ITER = -3

236          GO TO 40

237       END IF

238 *

239 *     Solve the system SA*SX = SB.

240 *

241       CALL SPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N,

242      $             INFO )

243 *

244 *     Convert SX back to double precision

245 *

246       CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )

247 *

248 *     Compute R = B - AX (R is WORK).

249 *

250       CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )

251 *

252       CALL DSYMM( 'Left', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE,

253      $            WORK, N )

254 *

255 *     Check whether the NRHS normwise backward errors satisfy the

256 *     stopping criterion. If yes, set ITER=0 and return.

257 *

258       DO I = 1, NRHS

259          XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )

260          RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )

261          IF( RNRM.GT.XNRM*CTE )

262      $      GO TO 10

263       END DO

264 *

265 *     If we are here, the NRHS normwise backward errors satisfy the

266 *     stopping criterion. We are good to exit.

267 *

268       ITER = 0

269       RETURN

270 *

271    10 CONTINUE

272 *

273       DO 30 IITER = 1, ITERMAX

274 *

275 *        Convert R (in WORK) from double precision to single precision

276 *        and store the result in SX.

277 *

278          CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )

279 *

280          IF( INFO.NE.0 ) THEN

281             ITER = -2

282             GO TO 40

283          END IF

284 *

285 *        Solve the system SA*SX = SR.

286 *

287          CALL SPOTRS( UPLO, N, NRHS, SWORK( PTSA ), N, SWORK( PTSX ), N,

288      $                INFO )

289 *

290 *        Convert SX back to double precision and update the current

291 *        iterate.

292 *

293          CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )

294 *

295          DO I = 1, NRHS

296             CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )

297          END DO

298 *

299 *        Compute R = B - AX (R is WORK).

300 *

301          CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )

302 *

303          CALL DSYMM( 'L', UPLO, N, NRHS, NEGONE, A, LDA, X, LDX, ONE,

304      $               WORK, N )

305 *

306 *        Check whether the NRHS normwise backward errors satisfy the

307 *        stopping criterion. If yes, set ITER=IITER>0 and return.

308 *

309          DO I = 1, NRHS

310             XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )

311             RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )

312             IF( RNRM.GT.XNRM*CTE )

313      $         GO TO 20

314          END DO

315 *

316 *        If we are here, the NRHS normwise backward errors satisfy the

317 *        stopping criterion, we are good to exit.

318 *

319          ITER = IITER

320 *

321          RETURN

322 *

323    20    CONTINUE

324 *

325    30 CONTINUE

326 *

327 *     If we are at this place of the code, this is because we have

328 *     performed ITER=ITERMAX iterations and never satisified the

329 *     stopping criterion, set up the ITER flag accordingly and follow

330 *     up on double precision routine.

331 *

332       ITER = -ITERMAX - 1

333 *

334    40 CONTINUE

335 *

336 *     Single-precision iterative refinement failed to converge to a

337 *     satisfactory solution, so we resort to double precision.

338 *

339       CALL DPOTRF( UPLO, N, A, LDA, INFO )

340 *

341       IF( INFO.NE.0 )

342      $   RETURN

343 *

344       CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )

345       CALL DPOTRS( UPLO, N, NRHS, A, LDA, X, LDX, INFO )

346 *

347       RETURN

348 *

349 *     End of DSPOSV.

350 *

351       END