dlaed3.f (flens/lapack/interface/ref

  1       SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,

  2      $                   CTOT, W, S, INFO )

  3 *

  4 *  -- LAPACK routine (version 3.2) --

  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  7 *     November 2006

  8 *

  9 *     .. Scalar Arguments ..

 10       INTEGER            INFO, K, LDQ, N, N1

 11       DOUBLE PRECISION   RHO

 12 *     ..

 13 *     .. Array Arguments ..

 14       INTEGER            CTOT( * ), INDX( * )

 15       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),

 16      $                   S( * ), W( * )

 17 *     ..

 18 *

 19 *  Purpose

 20 *  =======

 21 *

 22 *  DLAED3 finds the roots of the secular equation, as defined by the

 23 *  values in D, W, and RHO, between 1 and K.  It makes the

 24 *  appropriate calls to DLAED4 and then updates the eigenvectors by

 25 *  multiplying the matrix of eigenvectors of the pair of eigensystems

 26 *  being combined by the matrix of eigenvectors of the K-by-K system

 27 *  which is solved here.

 28 *

 29 *  This code makes very mild assumptions about floating point

 30 *  arithmetic. It will work on machines with a guard digit in

 31 *  add/subtract, or on those binary machines without guard digits

 32 *  which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.

 33 *  It could conceivably fail on hexadecimal or decimal machines

 34 *  without guard digits, but we know of none.

 35 *

 36 *  Arguments

 37 *  =========

 38 *

 39 *  K       (input) INTEGER

 40 *          The number of terms in the rational function to be solved by

 41 *          DLAED4.  K >= 0.

 42 *

 43 *  N       (input) INTEGER

 44 *          The number of rows and columns in the Q matrix.

 45 *          N >= K (deflation may result in N>K).

 46 *

 47 *  N1      (input) INTEGER

 48 *          The location of the last eigenvalue in the leading submatrix.

 49 *          min(1,N) <= N1 <= N/2.

 50 *

 51 *  D       (output) DOUBLE PRECISION array, dimension (N)

 52 *          D(I) contains the updated eigenvalues for

 53 *          1 <= I <= K.

 54 *

 55 *  Q       (output) DOUBLE PRECISION array, dimension (LDQ,N)

 56 *          Initially the first K columns are used as workspace.

 57 *          On output the columns 1 to K contain

 58 *          the updated eigenvectors.

 59 *

 60 *  LDQ     (input) INTEGER

 61 *          The leading dimension of the array Q.  LDQ >= max(1,N).

 62 *

 63 *  RHO     (input) DOUBLE PRECISION

 64 *          The value of the parameter in the rank one update equation.

 65 *          RHO >= 0 required.

 66 *

 67 *  DLAMDA  (input/output) DOUBLE PRECISION array, dimension (K)

 68 *          The first K elements of this array contain the old roots

 69 *          of the deflated updating problem.  These are the poles

 70 *          of the secular equation. May be changed on output by

 71 *          having lowest order bit set to zero on Cray X-MP, Cray Y-MP,

 72 *          Cray-2, or Cray C-90, as described above.

 73 *

 74 *  Q2      (input) DOUBLE PRECISION array, dimension (LDQ2, N)

 75 *          The first K columns of this matrix contain the non-deflated

 76 *          eigenvectors for the split problem.

 77 *

 78 *  INDX    (input) INTEGER array, dimension (N)

 79 *          The permutation used to arrange the columns of the deflated

 80 *          Q matrix into three groups (see DLAED2).

 81 *          The rows of the eigenvectors found by DLAED4 must be likewise

 82 *          permuted before the matrix multiply can take place.

 83 *

 84 *  CTOT    (input) INTEGER array, dimension (4)

 85 *          A count of the total number of the various types of columns

 86 *          in Q, as described in INDX.  The fourth column type is any

 87 *          column which has been deflated.

 88 *

 89 *  W       (input/output) DOUBLE PRECISION array, dimension (K)

 90 *          The first K elements of this array contain the components

 91 *          of the deflation-adjusted updating vector. Destroyed on

 92 *          output.

 93 *

 94 *  S       (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K

 95 *          Will contain the eigenvectors of the repaired matrix which

 96 *          will be multiplied by the previously accumulated eigenvectors

 97 *          to update the system.

 98 *

 99 *  LDS     (input) INTEGER

100 *          The leading dimension of S.  LDS >= max(1,K).

101 *

102 *  INFO    (output) INTEGER

103 *          = 0:  successful exit.

104 *          < 0:  if INFO = -i, the i-th argument had an illegal value.

105 *          > 0:  if INFO = 1, an eigenvalue did not converge

106 *

107 *  Further Details

108 *  ===============

109 *

110 *  Based on contributions by

111 *     Jeff Rutter, Computer Science Division, University of California

112 *     at Berkeley, USA

113 *  Modified by Francoise Tisseur, University of Tennessee.

114 *

115 *  =====================================================================

116 *

117 *     .. Parameters ..

118       DOUBLE PRECISION   ONE, ZERO

119       PARAMETER          ( ONE = 1.0D0, ZERO = 0.0D0 )

120 *     ..

121 *     .. Local Scalars ..

122       INTEGER            I, II, IQ2, J, N12, N2, N23

123       DOUBLE PRECISION   TEMP

124 *     ..

125 *     .. External Functions ..

126       DOUBLE PRECISION   DLAMC3, DNRM2

127       EXTERNAL           DLAMC3, DNRM2

128 *     ..

129 *     .. External Subroutines ..

130       EXTERNAL           DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA

131 *     ..

132 *     .. Intrinsic Functions ..

133       INTRINSIC          MAX, SIGN, SQRT

134 *     ..

135 *     .. Executable Statements ..

136 *

137 *     Test the input parameters.

138 *

139       INFO = 0

140 *

141       IF( K.LT.0 ) THEN

142          INFO = -1

143       ELSE IF( N.LT.K ) THEN

144          INFO = -2

145       ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN

146          INFO = -6

147       END IF

148       IF( INFO.NE.0 ) THEN

149          CALL XERBLA( 'DLAED3', -INFO )

150          RETURN

151       END IF

152 *

153 *     Quick return if possible

154 *

155       IF( K.EQ.0 )

156      $   RETURN

157 *

158 *     Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can

159 *     be computed with high relative accuracy (barring over/underflow).

160 *     This is a problem on machines without a guard digit in

161 *     add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).

162 *     The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),

163 *     which on any of these machines zeros out the bottommost

164 *     bit of DLAMDA(I) if it is 1; this makes the subsequent

165 *     subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation

166 *     occurs. On binary machines with a guard digit (almost all

167 *     machines) it does not change DLAMDA(I) at all. On hexadecimal

168 *     and decimal machines with a guard digit, it slightly

169 *     changes the bottommost bits of DLAMDA(I). It does not account

170 *     for hexadecimal or decimal machines without guard digits

171 *     (we know of none). We use a subroutine call to compute

172 *     2*DLAMBDA(I) to prevent optimizing compilers from eliminating

173 *     this code.

174 *

175       DO 10 I = 1, K

176          DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )

177    10 CONTINUE

178 *

179       DO 20 J = 1, K

180          CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )

181 *

182 *        If the zero finder fails, the computation is terminated.

183 *

184          IF( INFO.NE.0 )

185      $      GO TO 120

186    20 CONTINUE

187 *

188       IF( K.EQ.1 )

189      $   GO TO 110

190       IF( K.EQ.2 ) THEN

191          DO 30 J = 1, K

192             W( 1 ) = Q( 1, J )

193             W( 2 ) = Q( 2, J )

194             II = INDX( 1 )

195             Q( 1, J ) = W( II )

196             II = INDX( 2 )

197             Q( 2, J ) = W( II )

198    30    CONTINUE

199          GO TO 110

200       END IF

201 *

202 *     Compute updated W.

203 *

204       CALL DCOPY( K, W, 1, S, 1 )

205 *

206 *     Initialize W(I) = Q(I,I)

207 *

208       CALL DCOPY( K, Q, LDQ+1, W, 1 )

209       DO 60 J = 1, K

210          DO 40 I = 1, J - 1

211             W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )

212    40    CONTINUE

213          DO 50 I = J + 1, K

214             W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )

215    50    CONTINUE

216    60 CONTINUE

217       DO 70 I = 1, K

218          W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )

219    70 CONTINUE

220 *

221 *     Compute eigenvectors of the modified rank-1 modification.

222 *

223       DO 100 J = 1, K

224          DO 80 I = 1, K

225             S( I ) = W( I ) / Q( I, J )

226    80    CONTINUE

227          TEMP = DNRM2( K, S, 1 )

228          DO 90 I = 1, K

229             II = INDX( I )

230             Q( I, J ) = S( II ) / TEMP

231    90    CONTINUE

232   100 CONTINUE

233 *

234 *     Compute the updated eigenvectors.

235 *

236   110 CONTINUE

237 *

238       N2 = N - N1

239       N12 = CTOT( 1 ) + CTOT( 2 )

240       N23 = CTOT( 2 ) + CTOT( 3 )

241 *

242       CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )

243       IQ2 = N1*N12 + 1

244       IF( N23.NE.0 ) THEN

245          CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,

246      $               ZERO, Q( N1+1, 1 ), LDQ )

247       ELSE

248          CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )

249       END IF

250 *

251       CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )

252       IF( N12.NE.0 ) THEN

253          CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,

254      $               LDQ )

255       ELSE

256          CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )

257       END IF

258 *

259 *

260   120 CONTINUE

261       RETURN

262 *

263 *     End of DLAED3

264 *

265       END