dlaed2.f (flens/lapack/interface/ref

  1       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,

  2      $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )

  3 *

  4 *  -- LAPACK routine (version 3.2) --

  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  7 *     November 2006

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            INFO, K, LDQ, N, N1

 11       DOUBLE PRECISION   RHO

 12 *     ..

 13 *     .. Array Arguments ..

 14       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),

 15      $                   INDXQ( * )

 16       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),

 17      $                   W( * ), Z( * )

 18 *     ..

 19 *

 20 *  Purpose

 21 *  =======

 22 *

 23 *  DLAED2 merges the two sets of eigenvalues together into a single

 24 *  sorted set.  Then it tries to deflate the size of the problem.

 25 *  There are two ways in which deflation can occur:  when two or more

 26 *  eigenvalues are close together or if there is a tiny entry in the

 27 *  Z vector.  For each such occurrence the order of the related secular

 28 *  equation problem is reduced by one.

 29 *

 30 *  Arguments

 31 *  =========

 32 *

 33 *  K      (output) INTEGER

 34 *         The number of non-deflated eigenvalues, and the order of the

 35 *         related secular equation. 0 <= K <=N.

 36 *

 37 *  N      (input) INTEGER

 38 *         The dimension of the symmetric tridiagonal matrix.  N >= 0.

 39 *

 40 *  N1     (input) INTEGER

 41 *         The location of the last eigenvalue in the leading sub-matrix.

 42 *         min(1,N) <= N1 <= N/2.

 43 *

 44 *  D      (input/output) DOUBLE PRECISION array, dimension (N)

 45 *         On entry, D contains the eigenvalues of the two submatrices to

 46 *         be combined.

 47 *         On exit, D contains the trailing (N-K) updated eigenvalues

 48 *         (those which were deflated) sorted into increasing order.

 49 *

 50 *  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)

 51 *         On entry, Q contains the eigenvectors of two submatrices in

 52 *         the two square blocks with corners at (1,1), (N1,N1)

 53 *         and (N1+1, N1+1), (N,N).

 54 *         On exit, Q contains the trailing (N-K) updated eigenvectors

 55 *         (those which were deflated) in its last N-K columns.

 56 *

 57 *  LDQ    (input) INTEGER

 58 *         The leading dimension of the array Q.  LDQ >= max(1,N).

 59 *

 60 *  INDXQ  (input/output) INTEGER array, dimension (N)

 61 *         The permutation which separately sorts the two sub-problems

 62 *         in D into ascending order.  Note that elements in the second

 63 *         half of this permutation must first have N1 added to their

 64 *         values. Destroyed on exit.

 65 *

 66 *  RHO    (input/output) DOUBLE PRECISION

 67 *         On entry, the off-diagonal element associated with the rank-1

 68 *         cut which originally split the two submatrices which are now

 69 *         being recombined.

 70 *         On exit, RHO has been modified to the value required by

 71 *         DLAED3.

 72 *

 73 *  Z      (input) DOUBLE PRECISION array, dimension (N)

 74 *         On entry, Z contains the updating vector (the last

 75 *         row of the first sub-eigenvector matrix and the first row of

 76 *         the second sub-eigenvector matrix).

 77 *         On exit, the contents of Z have been destroyed by the updating

 78 *         process.

 79 *

 80 *  DLAMDA (output) DOUBLE PRECISION array, dimension (N)

 81 *         A copy of the first K eigenvalues which will be used by

 82 *         DLAED3 to form the secular equation.

 83 *

 84 *  W      (output) DOUBLE PRECISION array, dimension (N)

 85 *         The first k values of the final deflation-altered z-vector

 86 *         which will be passed to DLAED3.

 87 *

 88 *  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)

 89 *         A copy of the first K eigenvectors which will be used by

 90 *         DLAED3 in a matrix multiply (DGEMM) to solve for the new

 91 *         eigenvectors.

 92 *

 93 *  INDX   (workspace) INTEGER array, dimension (N)

 94 *         The permutation used to sort the contents of DLAMDA into

 95 *         ascending order.

 96 *

 97 *  INDXC  (output) INTEGER array, dimension (N)

 98 *         The permutation used to arrange the columns of the deflated

 99 *         Q matrix into three groups:  the first group contains non-zero

100 *         elements only at and above N1, the second contains

101 *         non-zero elements only below N1, and the third is dense.

102 *

103 *  INDXP  (workspace) INTEGER array, dimension (N)

104 *         The permutation used to place deflated values of D at the end

105 *         of the array.  INDXP(1:K) points to the nondeflated D-values

106 *         and INDXP(K+1:N) points to the deflated eigenvalues.

107 *

108 *  COLTYP (workspace/output) INTEGER array, dimension (N)

109 *         During execution, a label which will indicate which of the

110 *         following types a column in the Q2 matrix is:

111 *         1 : non-zero in the upper half only;

112 *         2 : dense;

113 *         3 : non-zero in the lower half only;

114 *         4 : deflated.

115 *         On exit, COLTYP(i) is the number of columns of type i,

116 *         for i=1 to 4 only.

117 *

118 *  INFO   (output) INTEGER

119 *          = 0:  successful exit.

120 *          < 0:  if INFO = -i, the i-th argument had an illegal value.

121 *

122 *  Further Details

123 *  ===============

124 *

125 *  Based on contributions by

126 *     Jeff Rutter, Computer Science Division, University of California

127 *     at Berkeley, USA

128 *  Modified by Francoise Tisseur, University of Tennessee.

129 *

130 *  =====================================================================

131 *

132 *     .. Parameters ..

133       DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT

134       PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,

135      $                   TWO = 2.0D0, EIGHT = 8.0D0 )

136 *     ..

137 *     .. Local Arrays ..

138       INTEGER            CTOT( 4 ), PSM( 4 )

139 *     ..

140 *     .. Local Scalars ..

141       INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,

142      $                   N2, NJ, PJ

143       DOUBLE PRECISION   C, EPS, S, T, TAU, TOL

144 *     ..

145 *     .. External Functions ..

146       INTEGER            IDAMAX

147       DOUBLE PRECISION   DLAMCH, DLAPY2

148       EXTERNAL           IDAMAX, DLAMCH, DLAPY2

149 *     ..

150 *     .. External Subroutines ..

151       EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA

152 *     ..

153 *     .. Intrinsic Functions ..

154       INTRINSIC          ABS, MAX, MIN, SQRT

155 *     ..

156 *     .. Executable Statements ..

157 *

158 *     Test the input parameters.

159 *

160       INFO = 0

161 *

162       IF( N.LT.0 ) THEN

163          INFO = -2

164       ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN

165          INFO = -6

166       ELSE IF( MIN( 1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN

167          INFO = -3

168       END IF

169       IF( INFO.NE.0 ) THEN

170          CALL XERBLA( 'DLAED2', -INFO )

171          RETURN

172       END IF

173 *

174 *     Quick return if possible

175 *

176       IF( N.EQ.0 )

177      $   RETURN

178 *

179       N2 = N - N1

180       N1P1 = N1 + 1

181 *

182       IF( RHO.LT.ZERO ) THEN

183          CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )

184       END IF

185 *

186 *     Normalize z so that norm(z) = 1.  Since z is the concatenation of

187 *     two normalized vectors, norm2(z) = sqrt(2).

188 *

189       T = ONE / SQRT( TWO )

190       CALL DSCAL( N, T, Z, 1 )

191 *

192 *     RHO = ABS( norm(z)**2 * RHO )

193 *

194       RHO = ABS( TWO*RHO )

195 *

196 *     Sort the eigenvalues into increasing order

197 *

198       DO 10 I = N1P1, N

199          INDXQ( I ) = INDXQ( I ) + N1

200    10 CONTINUE

201 *

202 *     re-integrate the deflated parts from the last pass

203 *

204       DO 20 I = 1, N

205          DLAMDA( I ) = D( INDXQ( I ) )

206    20 CONTINUE

207       CALL DLAMRG( N1, N2, DLAMDA, 1, 1, INDXC )

208       DO 30 I = 1, N

209          INDX( I ) = INDXQ( INDXC( I ) )

210    30 CONTINUE

211 *

212 *     Calculate the allowable deflation tolerance

213 *

214       IMAX = IDAMAX( N, Z, 1 )

215       JMAX = IDAMAX( N, D, 1 )

216       EPS = DLAMCH( 'Epsilon' )

217       TOL = EIGHT*EPS*MAX( ABS( D( JMAX ) ), ABS( Z( IMAX ) ) )

218 *

219 *     If the rank-1 modifier is small enough, no more needs to be done

220 *     except to reorganize Q so that its columns correspond with the

221 *     elements in D.

222 *

223       IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN

224          K = 0

225          IQ2 = 1

226          DO 40 J = 1, N

227             I = INDX( J )

228             CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )

229             DLAMDA( J ) = D( I )

230             IQ2 = IQ2 + N

231    40    CONTINUE

232          CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )

233          CALL DCOPY( N, DLAMDA, 1, D, 1 )

234          GO TO 190

235       END IF

236 *

237 *     If there are multiple eigenvalues then the problem deflates.  Here

238 *     the number of equal eigenvalues are found.  As each equal

239 *     eigenvalue is found, an elementary reflector is computed to rotate

240 *     the corresponding eigensubspace so that the corresponding

241 *     components of Z are zero in this new basis.

242 *

243       DO 50 I = 1, N1

244          COLTYP( I ) = 1

245    50 CONTINUE

246       DO 60 I = N1P1, N

247          COLTYP( I ) = 3

248    60 CONTINUE

249 *

250 *

251       K = 0

252       K2 = N + 1

253       DO 70 J = 1, N

254          NJ = INDX( J )

255          IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN

256 *

257 *           Deflate due to small z component.

258 *

259             K2 = K2 - 1

260             COLTYP( NJ ) = 4

261             INDXP( K2 ) = NJ

262             IF( J.EQ.N )

263      $         GO TO 100

264          ELSE

265             PJ = NJ

266             GO TO 80

267          END IF

268    70 CONTINUE

269    80 CONTINUE

270       J = J + 1

271       NJ = INDX( J )

272       IF( J.GT.N )

273      $   GO TO 100

274       IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN

275 *

276 *        Deflate due to small z component.

277 *

278          K2 = K2 - 1

279          COLTYP( NJ ) = 4

280          INDXP( K2 ) = NJ

281       ELSE

282 *

283 *        Check if eigenvalues are close enough to allow deflation.

284 *

285          S = Z( PJ )

286          C = Z( NJ )

287 *

288 *        Find sqrt(a**2+b**2) without overflow or

289 *        destructive underflow.

290 *

291          TAU = DLAPY2( C, S )

292          T = D( NJ ) - D( PJ )

293          C = C / TAU

294          S = -S / TAU

295          IF( ABS( T*C*S ).LE.TOL ) THEN

296 *

297 *           Deflation is possible.

298 *

299             Z( NJ ) = TAU

300             Z( PJ ) = ZERO

301             IF( COLTYP( NJ ).NE.COLTYP( PJ ) )

302      $         COLTYP( NJ ) = 2

303             COLTYP( PJ ) = 4

304             CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )

305             T = D( PJ )*C**2 + D( NJ )*S**2

306             D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2

307             D( PJ ) = T

308             K2 = K2 - 1

309             I = 1

310    90       CONTINUE

311             IF( K2+I.LE.N ) THEN

312                IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN

313                   INDXP( K2+I-1 ) = INDXP( K2+I )

314                   INDXP( K2+I ) = PJ

315                   I = I + 1

316                   GO TO 90

317                ELSE

318                   INDXP( K2+I-1 ) = PJ

319                END IF

320             ELSE

321                INDXP( K2+I-1 ) = PJ

322             END IF

323             PJ = NJ

324          ELSE

325             K = K + 1

326             DLAMDA( K ) = D( PJ )

327             W( K ) = Z( PJ )

328             INDXP( K ) = PJ

329             PJ = NJ

330          END IF

331       END IF

332       GO TO 80

333   100 CONTINUE

334 *

335 *     Record the last eigenvalue.

336 *

337       K = K + 1

338       DLAMDA( K ) = D( PJ )

339       W( K ) = Z( PJ )

340       INDXP( K ) = PJ

341 *

342 *     Count up the total number of the various types of columns, then

343 *     form a permutation which positions the four column types into

344 *     four uniform groups (although one or more of these groups may be

345 *     empty).

346 *

347       DO 110 J = 1, 4

348          CTOT( J ) = 0

349   110 CONTINUE

350       DO 120 J = 1, N

351          CT = COLTYP( J )

352          CTOT( CT ) = CTOT( CT ) + 1

353   120 CONTINUE

354 *

355 *     PSM(*) = Position in SubMatrix (of types 1 through 4)

356 *

357       PSM( 1 ) = 1

358       PSM( 2 ) = 1 + CTOT( 1 )

359       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )

360       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )

361       K = N - CTOT( 4 )

362 *

363 *     Fill out the INDXC array so that the permutation which it induces

364 *     will place all type-1 columns first, all type-2 columns next,

365 *     then all type-3's, and finally all type-4's.

366 *

367       DO 130 J = 1, N

368          JS = INDXP( J )

369          CT = COLTYP( JS )

370          INDX( PSM( CT ) ) = JS

371          INDXC( PSM( CT ) ) = J

372          PSM( CT ) = PSM( CT ) + 1

373   130 CONTINUE

374 *

375 *     Sort the eigenvalues and corresponding eigenvectors into DLAMDA

376 *     and Q2 respectively.  The eigenvalues/vectors which were not

377 *     deflated go into the first K slots of DLAMDA and Q2 respectively,

378 *     while those which were deflated go into the last N - K slots.

379 *

380       I = 1

381       IQ1 = 1

382       IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1

383       DO 140 J = 1, CTOT( 1 )

384          JS = INDX( I )

385          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )

386          Z( I ) = D( JS )

387          I = I + 1

388          IQ1 = IQ1 + N1

389   140 CONTINUE

390 *

391       DO 150 J = 1, CTOT( 2 )

392          JS = INDX( I )

393          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )

394          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )

395          Z( I ) = D( JS )

396          I = I + 1

397          IQ1 = IQ1 + N1

398          IQ2 = IQ2 + N2

399   150 CONTINUE

400 *

401       DO 160 J = 1, CTOT( 3 )

402          JS = INDX( I )

403          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )

404          Z( I ) = D( JS )

405          I = I + 1

406          IQ2 = IQ2 + N2

407   160 CONTINUE

408 *

409       IQ1 = IQ2

410       DO 170 J = 1, CTOT( 4 )

411          JS = INDX( I )

412          CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )

413          IQ2 = IQ2 + N

414          Z( I ) = D( JS )

415          I = I + 1

416   170 CONTINUE

417 *

418 *     The deflated eigenvalues and their corresponding vectors go back

419 *     into the last N - K slots of D and Q respectively.

420 *

421       CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ )

422       CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )

423 *

424 *     Copy CTOT into COLTYP for referencing in DLAED3.

425 *

426       DO 180 J = 1, 4

427          COLTYP( J ) = CTOT( J )

428   180 CONTINUE

429 *

430   190 CONTINUE

431       RETURN

432 *

433 *     End of DLAED2

434 *

435       END