1 SUBROUTINE DLAED3( K, N, N1, D, Q, LDQ, RHO, DLAMDA, Q2, INDX,
2 $ CTOT, W, S, INFO )
3 *
4 * -- LAPACK routine (version 3.2) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * November 2006
8 *
9 * .. Scalar Arguments ..
10 INTEGER INFO, K, LDQ, N, N1
11 DOUBLE PRECISION RHO
12 * ..
13 * .. Array Arguments ..
14 INTEGER CTOT( * ), INDX( * )
15 DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
16 $ S( * ), W( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * DLAED3 finds the roots of the secular equation, as defined by the
23 * values in D, W, and RHO, between 1 and K. It makes the
24 * appropriate calls to DLAED4 and then updates the eigenvectors by
25 * multiplying the matrix of eigenvectors of the pair of eigensystems
26 * being combined by the matrix of eigenvectors of the K-by-K system
27 * which is solved here.
28 *
29 * This code makes very mild assumptions about floating point
30 * arithmetic. It will work on machines with a guard digit in
31 * add/subtract, or on those binary machines without guard digits
32 * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
33 * It could conceivably fail on hexadecimal or decimal machines
34 * without guard digits, but we know of none.
35 *
36 * Arguments
37 * =========
38 *
39 * K (input) INTEGER
40 * The number of terms in the rational function to be solved by
41 * DLAED4. K >= 0.
42 *
43 * N (input) INTEGER
44 * The number of rows and columns in the Q matrix.
45 * N >= K (deflation may result in N>K).
46 *
47 * N1 (input) INTEGER
48 * The location of the last eigenvalue in the leading submatrix.
49 * min(1,N) <= N1 <= N/2.
50 *
51 * D (output) DOUBLE PRECISION array, dimension (N)
52 * D(I) contains the updated eigenvalues for
53 * 1 <= I <= K.
54 *
55 * Q (output) DOUBLE PRECISION array, dimension (LDQ,N)
56 * Initially the first K columns are used as workspace.
57 * On output the columns 1 to K contain
58 * the updated eigenvectors.
59 *
60 * LDQ (input) INTEGER
61 * The leading dimension of the array Q. LDQ >= max(1,N).
62 *
63 * RHO (input) DOUBLE PRECISION
64 * The value of the parameter in the rank one update equation.
65 * RHO >= 0 required.
66 *
67 * DLAMDA (input/output) DOUBLE PRECISION array, dimension (K)
68 * The first K elements of this array contain the old roots
69 * of the deflated updating problem. These are the poles
70 * of the secular equation. May be changed on output by
71 * having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
72 * Cray-2, or Cray C-90, as described above.
73 *
74 * Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N)
75 * The first K columns of this matrix contain the non-deflated
76 * eigenvectors for the split problem.
77 *
78 * INDX (input) INTEGER array, dimension (N)
79 * The permutation used to arrange the columns of the deflated
80 * Q matrix into three groups (see DLAED2).
81 * The rows of the eigenvectors found by DLAED4 must be likewise
82 * permuted before the matrix multiply can take place.
83 *
84 * CTOT (input) INTEGER array, dimension (4)
85 * A count of the total number of the various types of columns
86 * in Q, as described in INDX. The fourth column type is any
87 * column which has been deflated.
88 *
89 * W (input/output) DOUBLE PRECISION array, dimension (K)
90 * The first K elements of this array contain the components
91 * of the deflation-adjusted updating vector. Destroyed on
92 * output.
93 *
94 * S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
95 * Will contain the eigenvectors of the repaired matrix which
96 * will be multiplied by the previously accumulated eigenvectors
97 * to update the system.
98 *
99 * LDS (input) INTEGER
100 * The leading dimension of S. LDS >= max(1,K).
101 *
102 * INFO (output) INTEGER
103 * = 0: successful exit.
104 * < 0: if INFO = -i, the i-th argument had an illegal value.
105 * > 0: if INFO = 1, an eigenvalue did not converge
106 *
107 * Further Details
108 * ===============
109 *
110 * Based on contributions by
111 * Jeff Rutter, Computer Science Division, University of California
112 * at Berkeley, USA
113 * Modified by Francoise Tisseur, University of Tennessee.
114 *
115 * =====================================================================
116 *
117 * .. Parameters ..
118 DOUBLE PRECISION ONE, ZERO
119 PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 )
120 * ..
121 * .. Local Scalars ..
122 INTEGER I, II, IQ2, J, N12, N2, N23
123 DOUBLE PRECISION TEMP
124 * ..
125 * .. External Functions ..
126 DOUBLE PRECISION DLAMC3, DNRM2
127 EXTERNAL DLAMC3, DNRM2
128 * ..
129 * .. External Subroutines ..
130 EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
131 * ..
132 * .. Intrinsic Functions ..
133 INTRINSIC MAX, SIGN, SQRT
134 * ..
135 * .. Executable Statements ..
136 *
137 * Test the input parameters.
138 *
139 INFO = 0
140 *
141 IF( K.LT.0 ) THEN
142 INFO = -1
143 ELSE IF( N.LT.K ) THEN
144 INFO = -2
145 ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
146 INFO = -6
147 END IF
148 IF( INFO.NE.0 ) THEN
149 CALL XERBLA( 'DLAED3', -INFO )
150 RETURN
151 END IF
152 *
153 * Quick return if possible
154 *
155 IF( K.EQ.0 )
156 $ RETURN
157 *
158 * Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
159 * be computed with high relative accuracy (barring over/underflow).
160 * This is a problem on machines without a guard digit in
161 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
162 * The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
163 * which on any of these machines zeros out the bottommost
164 * bit of DLAMDA(I) if it is 1; this makes the subsequent
165 * subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
166 * occurs. On binary machines with a guard digit (almost all
167 * machines) it does not change DLAMDA(I) at all. On hexadecimal
168 * and decimal machines with a guard digit, it slightly
169 * changes the bottommost bits of DLAMDA(I). It does not account
170 * for hexadecimal or decimal machines without guard digits
171 * (we know of none). We use a subroutine call to compute
172 * 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
173 * this code.
174 *
175 DO 10 I = 1, K
176 DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
177 10 CONTINUE
178 *
179 DO 20 J = 1, K
180 CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
181 *
182 * If the zero finder fails, the computation is terminated.
183 *
184 IF( INFO.NE.0 )
185 $ GO TO 120
186 20 CONTINUE
187 *
188 IF( K.EQ.1 )
189 $ GO TO 110
190 IF( K.EQ.2 ) THEN
191 DO 30 J = 1, K
192 W( 1 ) = Q( 1, J )
193 W( 2 ) = Q( 2, J )
194 II = INDX( 1 )
195 Q( 1, J ) = W( II )
196 II = INDX( 2 )
197 Q( 2, J ) = W( II )
198 30 CONTINUE
199 GO TO 110
200 END IF
201 *
202 * Compute updated W.
203 *
204 CALL DCOPY( K, W, 1, S, 1 )
205 *
206 * Initialize W(I) = Q(I,I)
207 *
208 CALL DCOPY( K, Q, LDQ+1, W, 1 )
209 DO 60 J = 1, K
210 DO 40 I = 1, J - 1
211 W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
212 40 CONTINUE
213 DO 50 I = J + 1, K
214 W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
215 50 CONTINUE
216 60 CONTINUE
217 DO 70 I = 1, K
218 W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
219 70 CONTINUE
220 *
221 * Compute eigenvectors of the modified rank-1 modification.
222 *
223 DO 100 J = 1, K
224 DO 80 I = 1, K
225 S( I ) = W( I ) / Q( I, J )
226 80 CONTINUE
227 TEMP = DNRM2( K, S, 1 )
228 DO 90 I = 1, K
229 II = INDX( I )
230 Q( I, J ) = S( II ) / TEMP
231 90 CONTINUE
232 100 CONTINUE
233 *
234 * Compute the updated eigenvectors.
235 *
236 110 CONTINUE
237 *
238 N2 = N - N1
239 N12 = CTOT( 1 ) + CTOT( 2 )
240 N23 = CTOT( 2 ) + CTOT( 3 )
241 *
242 CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
243 IQ2 = N1*N12 + 1
244 IF( N23.NE.0 ) THEN
245 CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
246 $ ZERO, Q( N1+1, 1 ), LDQ )
247 ELSE
248 CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
249 END IF
250 *
251 CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
252 IF( N12.NE.0 ) THEN
253 CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
254 $ LDQ )
255 ELSE
256 CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
257 END IF
258 *
259 *
260 120 CONTINUE
261 RETURN
262 *
263 * End of DLAED3
264 *
265 END
2 $ CTOT, W, S, INFO )
3 *
4 * -- LAPACK routine (version 3.2) --
5 * -- LAPACK is a software package provided by Univ. of Tennessee, --
6 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
7 * November 2006
8 *
9 * .. Scalar Arguments ..
10 INTEGER INFO, K, LDQ, N, N1
11 DOUBLE PRECISION RHO
12 * ..
13 * .. Array Arguments ..
14 INTEGER CTOT( * ), INDX( * )
15 DOUBLE PRECISION D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
16 $ S( * ), W( * )
17 * ..
18 *
19 * Purpose
20 * =======
21 *
22 * DLAED3 finds the roots of the secular equation, as defined by the
23 * values in D, W, and RHO, between 1 and K. It makes the
24 * appropriate calls to DLAED4 and then updates the eigenvectors by
25 * multiplying the matrix of eigenvectors of the pair of eigensystems
26 * being combined by the matrix of eigenvectors of the K-by-K system
27 * which is solved here.
28 *
29 * This code makes very mild assumptions about floating point
30 * arithmetic. It will work on machines with a guard digit in
31 * add/subtract, or on those binary machines without guard digits
32 * which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or Cray-2.
33 * It could conceivably fail on hexadecimal or decimal machines
34 * without guard digits, but we know of none.
35 *
36 * Arguments
37 * =========
38 *
39 * K (input) INTEGER
40 * The number of terms in the rational function to be solved by
41 * DLAED4. K >= 0.
42 *
43 * N (input) INTEGER
44 * The number of rows and columns in the Q matrix.
45 * N >= K (deflation may result in N>K).
46 *
47 * N1 (input) INTEGER
48 * The location of the last eigenvalue in the leading submatrix.
49 * min(1,N) <= N1 <= N/2.
50 *
51 * D (output) DOUBLE PRECISION array, dimension (N)
52 * D(I) contains the updated eigenvalues for
53 * 1 <= I <= K.
54 *
55 * Q (output) DOUBLE PRECISION array, dimension (LDQ,N)
56 * Initially the first K columns are used as workspace.
57 * On output the columns 1 to K contain
58 * the updated eigenvectors.
59 *
60 * LDQ (input) INTEGER
61 * The leading dimension of the array Q. LDQ >= max(1,N).
62 *
63 * RHO (input) DOUBLE PRECISION
64 * The value of the parameter in the rank one update equation.
65 * RHO >= 0 required.
66 *
67 * DLAMDA (input/output) DOUBLE PRECISION array, dimension (K)
68 * The first K elements of this array contain the old roots
69 * of the deflated updating problem. These are the poles
70 * of the secular equation. May be changed on output by
71 * having lowest order bit set to zero on Cray X-MP, Cray Y-MP,
72 * Cray-2, or Cray C-90, as described above.
73 *
74 * Q2 (input) DOUBLE PRECISION array, dimension (LDQ2, N)
75 * The first K columns of this matrix contain the non-deflated
76 * eigenvectors for the split problem.
77 *
78 * INDX (input) INTEGER array, dimension (N)
79 * The permutation used to arrange the columns of the deflated
80 * Q matrix into three groups (see DLAED2).
81 * The rows of the eigenvectors found by DLAED4 must be likewise
82 * permuted before the matrix multiply can take place.
83 *
84 * CTOT (input) INTEGER array, dimension (4)
85 * A count of the total number of the various types of columns
86 * in Q, as described in INDX. The fourth column type is any
87 * column which has been deflated.
88 *
89 * W (input/output) DOUBLE PRECISION array, dimension (K)
90 * The first K elements of this array contain the components
91 * of the deflation-adjusted updating vector. Destroyed on
92 * output.
93 *
94 * S (workspace) DOUBLE PRECISION array, dimension (N1 + 1)*K
95 * Will contain the eigenvectors of the repaired matrix which
96 * will be multiplied by the previously accumulated eigenvectors
97 * to update the system.
98 *
99 * LDS (input) INTEGER
100 * The leading dimension of S. LDS >= max(1,K).
101 *
102 * INFO (output) INTEGER
103 * = 0: successful exit.
104 * < 0: if INFO = -i, the i-th argument had an illegal value.
105 * > 0: if INFO = 1, an eigenvalue did not converge
106 *
107 * Further Details
108 * ===============
109 *
110 * Based on contributions by
111 * Jeff Rutter, Computer Science Division, University of California
112 * at Berkeley, USA
113 * Modified by Francoise Tisseur, University of Tennessee.
114 *
115 * =====================================================================
116 *
117 * .. Parameters ..
118 DOUBLE PRECISION ONE, ZERO
119 PARAMETER ( ONE = 1.0D0, ZERO = 0.0D0 )
120 * ..
121 * .. Local Scalars ..
122 INTEGER I, II, IQ2, J, N12, N2, N23
123 DOUBLE PRECISION TEMP
124 * ..
125 * .. External Functions ..
126 DOUBLE PRECISION DLAMC3, DNRM2
127 EXTERNAL DLAMC3, DNRM2
128 * ..
129 * .. External Subroutines ..
130 EXTERNAL DCOPY, DGEMM, DLACPY, DLAED4, DLASET, XERBLA
131 * ..
132 * .. Intrinsic Functions ..
133 INTRINSIC MAX, SIGN, SQRT
134 * ..
135 * .. Executable Statements ..
136 *
137 * Test the input parameters.
138 *
139 INFO = 0
140 *
141 IF( K.LT.0 ) THEN
142 INFO = -1
143 ELSE IF( N.LT.K ) THEN
144 INFO = -2
145 ELSE IF( LDQ.LT.MAX( 1, N ) ) THEN
146 INFO = -6
147 END IF
148 IF( INFO.NE.0 ) THEN
149 CALL XERBLA( 'DLAED3', -INFO )
150 RETURN
151 END IF
152 *
153 * Quick return if possible
154 *
155 IF( K.EQ.0 )
156 $ RETURN
157 *
158 * Modify values DLAMDA(i) to make sure all DLAMDA(i)-DLAMDA(j) can
159 * be computed with high relative accuracy (barring over/underflow).
160 * This is a problem on machines without a guard digit in
161 * add/subtract (Cray XMP, Cray YMP, Cray C 90 and Cray 2).
162 * The following code replaces DLAMDA(I) by 2*DLAMDA(I)-DLAMDA(I),
163 * which on any of these machines zeros out the bottommost
164 * bit of DLAMDA(I) if it is 1; this makes the subsequent
165 * subtractions DLAMDA(I)-DLAMDA(J) unproblematic when cancellation
166 * occurs. On binary machines with a guard digit (almost all
167 * machines) it does not change DLAMDA(I) at all. On hexadecimal
168 * and decimal machines with a guard digit, it slightly
169 * changes the bottommost bits of DLAMDA(I). It does not account
170 * for hexadecimal or decimal machines without guard digits
171 * (we know of none). We use a subroutine call to compute
172 * 2*DLAMBDA(I) to prevent optimizing compilers from eliminating
173 * this code.
174 *
175 DO 10 I = 1, K
176 DLAMDA( I ) = DLAMC3( DLAMDA( I ), DLAMDA( I ) ) - DLAMDA( I )
177 10 CONTINUE
178 *
179 DO 20 J = 1, K
180 CALL DLAED4( K, J, DLAMDA, W, Q( 1, J ), RHO, D( J ), INFO )
181 *
182 * If the zero finder fails, the computation is terminated.
183 *
184 IF( INFO.NE.0 )
185 $ GO TO 120
186 20 CONTINUE
187 *
188 IF( K.EQ.1 )
189 $ GO TO 110
190 IF( K.EQ.2 ) THEN
191 DO 30 J = 1, K
192 W( 1 ) = Q( 1, J )
193 W( 2 ) = Q( 2, J )
194 II = INDX( 1 )
195 Q( 1, J ) = W( II )
196 II = INDX( 2 )
197 Q( 2, J ) = W( II )
198 30 CONTINUE
199 GO TO 110
200 END IF
201 *
202 * Compute updated W.
203 *
204 CALL DCOPY( K, W, 1, S, 1 )
205 *
206 * Initialize W(I) = Q(I,I)
207 *
208 CALL DCOPY( K, Q, LDQ+1, W, 1 )
209 DO 60 J = 1, K
210 DO 40 I = 1, J - 1
211 W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
212 40 CONTINUE
213 DO 50 I = J + 1, K
214 W( I ) = W( I )*( Q( I, J ) / ( DLAMDA( I )-DLAMDA( J ) ) )
215 50 CONTINUE
216 60 CONTINUE
217 DO 70 I = 1, K
218 W( I ) = SIGN( SQRT( -W( I ) ), S( I ) )
219 70 CONTINUE
220 *
221 * Compute eigenvectors of the modified rank-1 modification.
222 *
223 DO 100 J = 1, K
224 DO 80 I = 1, K
225 S( I ) = W( I ) / Q( I, J )
226 80 CONTINUE
227 TEMP = DNRM2( K, S, 1 )
228 DO 90 I = 1, K
229 II = INDX( I )
230 Q( I, J ) = S( II ) / TEMP
231 90 CONTINUE
232 100 CONTINUE
233 *
234 * Compute the updated eigenvectors.
235 *
236 110 CONTINUE
237 *
238 N2 = N - N1
239 N12 = CTOT( 1 ) + CTOT( 2 )
240 N23 = CTOT( 2 ) + CTOT( 3 )
241 *
242 CALL DLACPY( 'A', N23, K, Q( CTOT( 1 )+1, 1 ), LDQ, S, N23 )
243 IQ2 = N1*N12 + 1
244 IF( N23.NE.0 ) THEN
245 CALL DGEMM( 'N', 'N', N2, K, N23, ONE, Q2( IQ2 ), N2, S, N23,
246 $ ZERO, Q( N1+1, 1 ), LDQ )
247 ELSE
248 CALL DLASET( 'A', N2, K, ZERO, ZERO, Q( N1+1, 1 ), LDQ )
249 END IF
250 *
251 CALL DLACPY( 'A', N12, K, Q, LDQ, S, N12 )
252 IF( N12.NE.0 ) THEN
253 CALL DGEMM( 'N', 'N', N1, K, N12, ONE, Q2, N1, S, N12, ZERO, Q,
254 $ LDQ )
255 ELSE
256 CALL DLASET( 'A', N1, K, ZERO, ZERO, Q( 1, 1 ), LDQ )
257 END IF
258 *
259 *
260 120 CONTINUE
261 RETURN
262 *
263 * End of DLAED3
264 *
265 END