dlarrf.f (flens/lapack/interface/ref

  1       SUBROUTINE DLARRF( N, D, L, LD, CLSTRT, CLEND,

  2      $                   W, WGAP, WERR,

  3      $                   SPDIAM, CLGAPL, CLGAPR, PIVMIN, SIGMA,

  4      $                   DPLUS, LPLUS, WORK, INFO )

  5 *

  6 *  -- LAPACK auxiliary routine (version 3.2.2) --

  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  9 *     June 2010

 10 **

 11 *     .. Scalar Arguments ..

 12       INTEGER            CLSTRT, CLEND, INFO, N

 13       DOUBLE PRECISION   CLGAPL, CLGAPR, PIVMIN, SIGMA, SPDIAM

 14 *     ..

 15 *     .. Array Arguments ..

 16       DOUBLE PRECISION   D( * ), DPLUS( * ), L( * ), LD( * ),

 17      $          LPLUS( * ), W( * ), WGAP( * ), WERR( * ), WORK( * )

 18 *     ..

 19 *

 20 *  Purpose

 21 *  =======

 22 *

 23 *  Given the initial representation L D L^T and its cluster of close

 24 *  eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ...

 25 *  W( CLEND ), DLARRF finds a new relatively robust representation

 26 *  L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the

 27 *  eigenvalues of L(+) D(+) L(+)^T is relatively isolated.

 28 *

 29 *  Arguments

 30 *  =========

 31 *

 32 *  N       (input) INTEGER

 33 *          The order of the matrix (subblock, if the matrix splitted).

 34 *

 35 *  D       (input) DOUBLE PRECISION array, dimension (N)

 36 *          The N diagonal elements of the diagonal matrix D.

 37 *

 38 *  L       (input) DOUBLE PRECISION array, dimension (N-1)

 39 *          The (N-1) subdiagonal elements of the unit bidiagonal

 40 *          matrix L.

 41 *

 42 *  LD      (input) DOUBLE PRECISION array, dimension (N-1)

 43 *          The (N-1) elements L(i)*D(i).

 44 *

 45 *  CLSTRT  (input) INTEGER

 46 *          The index of the first eigenvalue in the cluster.

 47 *

 48 *  CLEND   (input) INTEGER

 49 *          The index of the last eigenvalue in the cluster.

 50 *

 51 *  W       (input) DOUBLE PRECISION array, dimension

 52 *          dimension is >=  (CLEND-CLSTRT+1)

 53 *          The eigenvalue APPROXIMATIONS of L D L^T in ascending order.

 54 *          W( CLSTRT ) through W( CLEND ) form the cluster of relatively

 55 *          close eigenalues.

 56 *

 57 *  WGAP    (input/output) DOUBLE PRECISION array, dimension

 58 *          dimension is >=  (CLEND-CLSTRT+1)

 59 *          The separation from the right neighbor eigenvalue in W.

 60 *

 61 *  WERR    (input) DOUBLE PRECISION array, dimension

 62 *          dimension is  >=  (CLEND-CLSTRT+1)

 63 *          WERR contain the semiwidth of the uncertainty

 64 *          interval of the corresponding eigenvalue APPROXIMATION in W

 65 *

 66 *  SPDIAM  (input) DOUBLE PRECISION

 67 *          estimate of the spectral diameter obtained from the

 68 *          Gerschgorin intervals

 69 *

 70 *  CLGAPL  (input) DOUBLE PRECISION

 71 *

 72 *  CLGAPR  (input) DOUBLE PRECISION

 73 *          absolute gap on each end of the cluster.

 74 *          Set by the calling routine to protect against shifts too close

 75 *          to eigenvalues outside the cluster.

 76 *

 77 *  PIVMIN  (input) DOUBLE PRECISION

 78 *          The minimum pivot allowed in the Sturm sequence.

 79 *

 80 *  SIGMA   (output) DOUBLE PRECISION

 81 *          The shift used to form L(+) D(+) L(+)^T.

 82 *

 83 *  DPLUS   (output) DOUBLE PRECISION array, dimension (N)

 84 *          The N diagonal elements of the diagonal matrix D(+).

 85 *

 86 *  LPLUS   (output) DOUBLE PRECISION array, dimension (N-1)

 87 *          The first (N-1) elements of LPLUS contain the subdiagonal

 88 *          elements of the unit bidiagonal matrix L(+).

 89 *

 90 *  WORK    (workspace) DOUBLE PRECISION array, dimension (2*N)

 91 *          Workspace.

 92 *

 93 *  INFO    (output) INTEGER

 94 *          Signals processing OK (=0) or failure (=1)

 95 *

 96 *  Further Details

 97 *  ===============

 98 *

 99 *  Based on contributions by

100 *     Beresford Parlett, University of California, Berkeley, USA

101 *     Jim Demmel, University of California, Berkeley, USA

102 *     Inderjit Dhillon, University of Texas, Austin, USA

103 *     Osni Marques, LBNL/NERSC, USA

104 *     Christof Voemel, University of California, Berkeley, USA

105 *

106 *  =====================================================================

107 *

108 *     .. Parameters ..

109       DOUBLE PRECISION   FOUR, MAXGROWTH1, MAXGROWTH2, ONE, QUART, TWO,

110      $                   ZERO

111       PARAMETER          ( ZERO = 0.0D0, ONE = 1.0D0, TWO = 2.0D0,

112      $                     FOUR = 4.0D0, QUART = 0.25D0,

113      $                     MAXGROWTH1 = 8.D0,

114      $                     MAXGROWTH2 = 8.D0 )

115 *     ..

116 *     .. Local Scalars ..

117       LOGICAL   DORRR1, FORCER, NOFAIL, SAWNAN1, SAWNAN2, TRYRRR1

118       INTEGER            I, INDX, KTRY, KTRYMAX, SLEFT, SRIGHT, SHIFT

119       PARAMETER          ( KTRYMAX = 1, SLEFT = 1, SRIGHT = 2 )

120       DOUBLE PRECISION   AVGAP, BESTSHIFT, CLWDTH, EPS, FACT, FAIL,

121      $                   FAIL2, GROWTHBOUND, LDELTA, LDMAX, LSIGMA,

122      $                   MAX1, MAX2, MINGAP, OLDP, PROD, RDELTA, RDMAX,

123      $                   RRR1, RRR2, RSIGMA, S, SMLGROWTH, TMP, ZNM2

124 *     ..

125 *     .. External Functions ..

126       LOGICAL DISNAN

127       DOUBLE PRECISION   DLAMCH

128       EXTERNAL           DISNAN, DLAMCH

129 *     ..

130 *     .. External Subroutines ..

131       EXTERNAL           DCOPY

132 *     ..

133 *     .. Intrinsic Functions ..

134       INTRINSIC          ABS

135 *     ..

136 *     .. Executable Statements ..

137 *

138       INFO = 0

139       FACT = DBLE(2**KTRYMAX)

140       EPS = DLAMCH( 'Precision' )

141       SHIFT = 0

142       FORCER = .FALSE.

143 

144 

145 *     Note that we cannot guarantee that for any of the shifts tried,

146 *     the factorization has a small or even moderate element growth.

147 *     There could be Ritz values at both ends of the cluster and despite

148 *     backing off, there are examples where all factorizations tried

149 *     (in IEEE mode, allowing zero pivots & infinities) have INFINITE

150 *     element growth.

151 *     For this reason, we should use PIVMIN in this subroutine so that at

152 *     least the L D L^T factorization exists. It can be checked afterwards

153 *     whether the element growth caused bad residuals/orthogonality.

154 

155 *     Decide whether the code should accept the best among all

156 *     representations despite large element growth or signal INFO=1

157       NOFAIL = .TRUE.

158 *

159 

160 *     Compute the average gap length of the cluster

161       CLWDTH = ABS(W(CLEND)-W(CLSTRT)) + WERR(CLEND) + WERR(CLSTRT)

162       AVGAP = CLWDTH / DBLE(CLEND-CLSTRT)

163       MINGAP = MIN(CLGAPL, CLGAPR)

164 *     Initial values for shifts to both ends of cluster

165       LSIGMA = MIN(W( CLSTRT ),W( CLEND )) - WERR( CLSTRT )

166       RSIGMA = MAX(W( CLSTRT ),W( CLEND )) + WERR( CLEND )

167 

168 *     Use a small fudge to make sure that we really shift to the outside

169       LSIGMA = LSIGMA - ABS(LSIGMA)* FOUR * EPS

170       RSIGMA = RSIGMA + ABS(RSIGMA)* FOUR * EPS

171 

172 *     Compute upper bounds for how much to back off the initial shifts

173       LDMAX = QUART * MINGAP + TWO * PIVMIN

174       RDMAX = QUART * MINGAP + TWO * PIVMIN

175 

176       LDELTA = MAX(AVGAP,WGAP( CLSTRT ))/FACT

177       RDELTA = MAX(AVGAP,WGAP( CLEND-1 ))/FACT

178 *

179 *     Initialize the record of the best representation found

180 *

181       S = DLAMCH( 'S' )

182       SMLGROWTH = ONE / S

183       FAIL = DBLE(N-1)*MINGAP/(SPDIAM*EPS)

184       FAIL2 = DBLE(N-1)*MINGAP/(SPDIAM*SQRT(EPS))

185       BESTSHIFT = LSIGMA

186 *

187 *     while (KTRY <= KTRYMAX)

188       KTRY = 0

189       GROWTHBOUND = MAXGROWTH1*SPDIAM

190 

191  5    CONTINUE

192       SAWNAN1 = .FALSE.

193       SAWNAN2 = .FALSE.

194 *     Ensure that we do not back off too much of the initial shifts

195       LDELTA = MIN(LDMAX,LDELTA)

196       RDELTA = MIN(RDMAX,RDELTA)

197 

198 *     Compute the element growth when shifting to both ends of the cluster

199 *     accept the shift if there is no element growth at one of the two ends

200 

201 *     Left end

202       S = -LSIGMA

203       DPLUS( 1 ) = D( 1 ) + S

204       IF(ABS(DPLUS(1)).LT.PIVMIN) THEN

205          DPLUS(1) = -PIVMIN

206 *        Need to set SAWNAN1 because refined RRR test should not be used

207 *        in this case

208          SAWNAN1 = .TRUE.

209       ENDIF

210       MAX1 = ABS( DPLUS( 1 ) )

211       DO 6 I = 1, N - 1

212          LPLUS( I ) = LD( I ) / DPLUS( I )

213          S = S*LPLUS( I )*L( I ) - LSIGMA

214          DPLUS( I+1 ) = D( I+1 ) + S

215          IF(ABS(DPLUS(I+1)).LT.PIVMIN) THEN

216             DPLUS(I+1) = -PIVMIN

217 *           Need to set SAWNAN1 because refined RRR test should not be used

218 *           in this case

219             SAWNAN1 = .TRUE.

220          ENDIF

221          MAX1 = MAX( MAX1,ABS(DPLUS(I+1)) )

222  6    CONTINUE

223       SAWNAN1 = SAWNAN1 .OR.  DISNAN( MAX1 )

224 

225       IF( FORCER .OR.

226      $   (MAX1.LE.GROWTHBOUND .AND. .NOT.SAWNAN1 ) ) THEN

227          SIGMA = LSIGMA

228          SHIFT = SLEFT

229          GOTO 100

230       ENDIF

231 

232 *     Right end

233       S = -RSIGMA

234       WORK( 1 ) = D( 1 ) + S

235       IF(ABS(WORK(1)).LT.PIVMIN) THEN

236          WORK(1) = -PIVMIN

237 *        Need to set SAWNAN2 because refined RRR test should not be used

238 *        in this case

239          SAWNAN2 = .TRUE.

240       ENDIF

241       MAX2 = ABS( WORK( 1 ) )

242       DO 7 I = 1, N - 1

243          WORK( N+I ) = LD( I ) / WORK( I )

244          S = S*WORK( N+I )*L( I ) - RSIGMA

245          WORK( I+1 ) = D( I+1 ) + S

246          IF(ABS(WORK(I+1)).LT.PIVMIN) THEN

247             WORK(I+1) = -PIVMIN

248 *           Need to set SAWNAN2 because refined RRR test should not be used

249 *           in this case

250             SAWNAN2 = .TRUE.

251          ENDIF

252          MAX2 = MAX( MAX2,ABS(WORK(I+1)) )

253  7    CONTINUE

254       SAWNAN2 = SAWNAN2 .OR.  DISNAN( MAX2 )

255 

256       IF( FORCER .OR.

257      $   (MAX2.LE.GROWTHBOUND .AND. .NOT.SAWNAN2 ) ) THEN

258          SIGMA = RSIGMA

259          SHIFT = SRIGHT

260          GOTO 100

261       ENDIF

262 *     If we are at this point, both shifts led to too much element growth

263 

264 *     Record the better of the two shifts (provided it didn't lead to NaN)

265       IF(SAWNAN1.AND.SAWNAN2) THEN

266 *        both MAX1 and MAX2 are NaN

267          GOTO 50

268       ELSE

269          IF( .NOT.SAWNAN1 ) THEN

270             INDX = 1

271             IF(MAX1.LE.SMLGROWTH) THEN

272                SMLGROWTH = MAX1

273                BESTSHIFT = LSIGMA

274             ENDIF

275          ENDIF

276          IF( .NOT.SAWNAN2 ) THEN

277             IF(SAWNAN1 .OR. MAX2.LE.MAX1) INDX = 2

278             IF(MAX2.LE.SMLGROWTH) THEN

279                SMLGROWTH = MAX2

280                BESTSHIFT = RSIGMA

281             ENDIF

282          ENDIF

283       ENDIF

284 

285 *     If we are here, both the left and the right shift led to

286 *     element growth. If the element growth is moderate, then

287 *     we may still accept the representation, if it passes a

288 *     refined test for RRR. This test supposes that no NaN occurred.

289 *     Moreover, we use the refined RRR test only for isolated clusters.

290       IF((CLWDTH.LT.MINGAP/DBLE(128)) .AND.

291      $   (MIN(MAX1,MAX2).LT.FAIL2)

292      $  .AND.(.NOT.SAWNAN1).AND.(.NOT.SAWNAN2)) THEN

293          DORRR1 = .TRUE.

294       ELSE

295          DORRR1 = .FALSE.

296       ENDIF

297       TRYRRR1 = .TRUE.

298       IF( TRYRRR1 .AND. DORRR1 ) THEN

299       IF(INDX.EQ.1) THEN

300          TMP = ABS( DPLUS( N ) )

301          ZNM2 = ONE

302          PROD = ONE

303          OLDP = ONE

304          DO 15 I = N-1, 1, -1

305             IF( PROD .LE. EPS ) THEN

306                PROD =

307      $         ((DPLUS(I+1)*WORK(N+I+1))/(DPLUS(I)*WORK(N+I)))*OLDP

308             ELSE

309                PROD = PROD*ABS(WORK(N+I))

310             END IF

311             OLDP = PROD

312             ZNM2 = ZNM2 + PROD**2

313             TMP = MAX( TMP, ABS( DPLUS( I ) * PROD ))

314  15      CONTINUE

315          RRR1 = TMP/( SPDIAM * SQRT( ZNM2 ) )

316          IF (RRR1.LE.MAXGROWTH2) THEN

317             SIGMA = LSIGMA

318             SHIFT = SLEFT

319             GOTO 100

320          ENDIF

321       ELSE IF(INDX.EQ.2) THEN

322          TMP = ABS( WORK( N ) )

323          ZNM2 = ONE

324          PROD = ONE

325          OLDP = ONE

326          DO 16 I = N-1, 1, -1

327             IF( PROD .LE. EPS ) THEN

328                PROD = ((WORK(I+1)*LPLUS(I+1))/(WORK(I)*LPLUS(I)))*OLDP

329             ELSE

330                PROD = PROD*ABS(LPLUS(I))

331             END IF

332             OLDP = PROD

333             ZNM2 = ZNM2 + PROD**2

334             TMP = MAX( TMP, ABS( WORK( I ) * PROD ))

335  16      CONTINUE

336          RRR2 = TMP/( SPDIAM * SQRT( ZNM2 ) )

337          IF (RRR2.LE.MAXGROWTH2) THEN

338             SIGMA = RSIGMA

339             SHIFT = SRIGHT

340             GOTO 100

341          ENDIF

342       END IF

343       ENDIF

344 

345  50   CONTINUE

346 

347       IF (KTRY.LT.KTRYMAX) THEN

348 *        If we are here, both shifts failed also the RRR test.

349 *        Back off to the outside

350          LSIGMA = MAX( LSIGMA - LDELTA,

351      $     LSIGMA - LDMAX)

352          RSIGMA = MIN( RSIGMA + RDELTA,

353      $     RSIGMA + RDMAX )

354          LDELTA = TWO * LDELTA

355          RDELTA = TWO * RDELTA

356          KTRY = KTRY + 1

357          GOTO 5

358       ELSE

359 *        None of the representations investigated satisfied our

360 *        criteria. Take the best one we found.

361          IF((SMLGROWTH.LT.FAIL).OR.NOFAIL) THEN

362             LSIGMA = BESTSHIFT

363             RSIGMA = BESTSHIFT

364             FORCER = .TRUE.

365             GOTO 5

366          ELSE

367             INFO = 1

368             RETURN

369          ENDIF

370       END IF

371 

372  100  CONTINUE

373       IF (SHIFT.EQ.SLEFT) THEN

374       ELSEIF (SHIFT.EQ.SRIGHT) THEN

375 *        store new L and D back into DPLUS, LPLUS

376          CALL DCOPY( N, WORK, 1, DPLUS, 1 )

377          CALL DCOPY( N-1, WORK(N+1), 1, LPLUS, 1 )

378       ENDIF

379 

380       RETURN

381 *

382 *     End of DLARRF

383 *

384       END