1       SUBROUTINE DLAED2( K, N, N1, D, Q, LDQ, INDXQ, RHO, Z, DLAMDA, W,
  2      $                   Q2, INDX, INDXC, INDXP, COLTYP, INFO )
  3 *
  4 *  -- LAPACK routine (version 3.2) --
  5 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  6 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  7 *     November 2006
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            INFO, K, LDQ, N, N1
 11       DOUBLE PRECISION   RHO
 12 *     ..
 13 *     .. Array Arguments ..
 14       INTEGER            COLTYP( * ), INDX( * ), INDXC( * ), INDXP( * ),
 15      $                   INDXQ( * )
 16       DOUBLE PRECISION   D( * ), DLAMDA( * ), Q( LDQ, * ), Q2( * ),
 17      $                   W( * ), Z( * )
 18 *     ..
 19 *
 20 *  Purpose
 21 *  =======
 22 *
 23 *  DLAED2 merges the two sets of eigenvalues together into a single
 24 *  sorted set.  Then it tries to deflate the size of the problem.
 25 *  There are two ways in which deflation can occur:  when two or more
 26 *  eigenvalues are close together or if there is a tiny entry in the
 27 *  Z vector.  For each such occurrence the order of the related secular
 28 *  equation problem is reduced by one.
 29 *
 30 *  Arguments
 31 *  =========
 32 *
 33 *  K      (output) INTEGER
 34 *         The number of non-deflated eigenvalues, and the order of the
 35 *         related secular equation. 0 <= K <=N.
 36 *
 37 *  N      (input) INTEGER
 38 *         The dimension of the symmetric tridiagonal matrix.  N >= 0.
 39 *
 40 *  N1     (input) INTEGER
 41 *         The location of the last eigenvalue in the leading sub-matrix.
 42 *         min(1,N) <= N1 <= N/2.
 43 *
 44 *  D      (input/output) DOUBLE PRECISION array, dimension (N)
 45 *         On entry, D contains the eigenvalues of the two submatrices to
 46 *         be combined.
 47 *         On exit, D contains the trailing (N-K) updated eigenvalues
 48 *         (those which were deflated) sorted into increasing order.
 49 *
 50 *  Q      (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
 51 *         On entry, Q contains the eigenvectors of two submatrices in
 52 *         the two square blocks with corners at (1,1), (N1,N1)
 53 *         and (N1+1, N1+1), (N,N).
 54 *         On exit, Q contains the trailing (N-K) updated eigenvectors
 55 *         (those which were deflated) in its last N-K columns.
 56 *
 57 *  LDQ    (input) INTEGER
 58 *         The leading dimension of the array Q.  LDQ >= max(1,N).
 59 *
 60 *  INDXQ  (input/output) INTEGER array, dimension (N)
 61 *         The permutation which separately sorts the two sub-problems
 62 *         in D into ascending order.  Note that elements in the second
 63 *         half of this permutation must first have N1 added to their
 64 *         values. Destroyed on exit.
 65 *
 66 *  RHO    (input/output) DOUBLE PRECISION
 67 *         On entry, the off-diagonal element associated with the rank-1
 68 *         cut which originally split the two submatrices which are now
 69 *         being recombined.
 70 *         On exit, RHO has been modified to the value required by
 71 *         DLAED3.
 72 *
 73 *  Z      (input) DOUBLE PRECISION array, dimension (N)
 74 *         On entry, Z contains the updating vector (the last
 75 *         row of the first sub-eigenvector matrix and the first row of
 76 *         the second sub-eigenvector matrix).
 77 *         On exit, the contents of Z have been destroyed by the updating
 78 *         process.
 79 *
 80 *  DLAMDA (output) DOUBLE PRECISION array, dimension (N)
 81 *         A copy of the first K eigenvalues which will be used by
 82 *         DLAED3 to form the secular equation.
 83 *
 84 *  W      (output) DOUBLE PRECISION array, dimension (N)
 85 *         The first k values of the final deflation-altered z-vector
 86 *         which will be passed to DLAED3.
 87 *
 88 *  Q2     (output) DOUBLE PRECISION array, dimension (N1**2+(N-N1)**2)
 89 *         A copy of the first K eigenvectors which will be used by
 90 *         DLAED3 in a matrix multiply (DGEMM) to solve for the new
 91 *         eigenvectors.
 92 *
 93 *  INDX   (workspace) INTEGER array, dimension (N)
 94 *         The permutation used to sort the contents of DLAMDA into
 95 *         ascending order.
 96 *
 97 *  INDXC  (output) INTEGER array, dimension (N)
 98 *         The permutation used to arrange the columns of the deflated
 99 *         Q matrix into three groups:  the first group contains non-zero
100 *         elements only at and above N1, the second contains
101 *         non-zero elements only below N1, and the third is dense.
102 *
103 *  INDXP  (workspace) INTEGER array, dimension (N)
104 *         The permutation used to place deflated values of D at the end
105 *         of the array.  INDXP(1:K) points to the nondeflated D-values
106 *         and INDXP(K+1:N) points to the deflated eigenvalues.
107 *
108 *  COLTYP (workspace/output) INTEGER array, dimension (N)
109 *         During execution, a label which will indicate which of the
110 *         following types a column in the Q2 matrix is:
111 *         1 : non-zero in the upper half only;
112 *         2 : dense;
113 *         3 : non-zero in the lower half only;
114 *         4 : deflated.
115 *         On exit, COLTYP(i) is the number of columns of type i,
116 *         for i=1 to 4 only.
117 *
118 *  INFO   (output) INTEGER
119 *          = 0:  successful exit.
120 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
121 *
122 *  Further Details
123 *  ===============
124 *
125 *  Based on contributions by
126 *     Jeff Rutter, Computer Science Division, University of California
127 *     at Berkeley, USA
128 *  Modified by Francoise Tisseur, University of Tennessee.
129 *
130 *  =====================================================================
131 *
132 *     .. Parameters ..
133       DOUBLE PRECISION   MONE, ZERO, ONE, TWO, EIGHT
134       PARAMETER          ( MONE = -1.0D0, ZERO = 0.0D0, ONE = 1.0D0,
135      $                   TWO = 2.0D0, EIGHT = 8.0D0 )
136 *     ..
137 *     .. Local Arrays ..
138       INTEGER            CTOT( 4 ), PSM( 4 )
139 *     ..
140 *     .. Local Scalars ..
141       INTEGER            CT, I, IMAX, IQ1, IQ2, J, JMAX, JS, K2, N1P1,
142      $                   N2, NJ, PJ
143       DOUBLE PRECISION   C, EPS, S, T, TAU, TOL
144 *     ..
145 *     .. External Functions ..
146       INTEGER            IDAMAX
147       DOUBLE PRECISION   DLAMCH, DLAPY2
148       EXTERNAL           IDAMAX, DLAMCH, DLAPY2
149 *     ..
150 *     .. External Subroutines ..
151       EXTERNAL           DCOPY, DLACPY, DLAMRG, DROT, DSCAL, XERBLA
152 *     ..
153 *     .. Intrinsic Functions ..
154       INTRINSIC          ABSMAXMINSQRT
155 *     ..
156 *     .. Executable Statements ..
157 *
158 *     Test the input parameters.
159 *
160       INFO = 0
161 *
162       IF( N.LT.0 ) THEN
163          INFO = -2
164       ELSE IF( LDQ.LT.MAX1, N ) ) THEN
165          INFO = -6
166       ELSE IFMIN1, ( N / 2 ) ).GT.N1 .OR. ( N / 2 ).LT.N1 ) THEN
167          INFO = -3
168       END IF
169       IF( INFO.NE.0 ) THEN
170          CALL XERBLA( 'DLAED2'-INFO )
171          RETURN
172       END IF
173 *
174 *     Quick return if possible
175 *
176       IF( N.EQ.0 )
177      $   RETURN
178 *
179       N2 = N - N1
180       N1P1 = N1 + 1
181 *
182       IF( RHO.LT.ZERO ) THEN
183          CALL DSCAL( N2, MONE, Z( N1P1 ), 1 )
184       END IF
185 *
186 *     Normalize z so that norm(z) = 1.  Since z is the concatenation of
187 *     two normalized vectors, norm2(z) = sqrt(2).
188 *
189       T = ONE / SQRT( TWO )
190       CALL DSCAL( N, T, Z, 1 )
191 *
192 *     RHO = ABS( norm(z)**2 * RHO )
193 *
194       RHO = ABS( TWO*RHO )
195 *
196 *     Sort the eigenvalues into increasing order
197 *
198       DO 10 I = N1P1, N
199          INDXQ( I ) = INDXQ( I ) + N1
200    10 CONTINUE
201 *
202 *     re-integrate the deflated parts from the last pass
203 *
204       DO 20 I = 1, N
205          DLAMDA( I ) = D( INDXQ( I ) )
206    20 CONTINUE
207       CALL DLAMRG( N1, N2, DLAMDA, 11, INDXC )
208       DO 30 I = 1, N
209          INDX( I ) = INDXQ( INDXC( I ) )
210    30 CONTINUE
211 *
212 *     Calculate the allowable deflation tolerance
213 *
214       IMAX = IDAMAX( N, Z, 1 )
215       JMAX = IDAMAX( N, D, 1 )
216       EPS = DLAMCH( 'Epsilon' )
217       TOL = EIGHT*EPS*MAXABS( D( JMAX ) ), ABS( Z( IMAX ) ) )
218 *
219 *     If the rank-1 modifier is small enough, no more needs to be done
220 *     except to reorganize Q so that its columns correspond with the
221 *     elements in D.
222 *
223       IF( RHO*ABS( Z( IMAX ) ).LE.TOL ) THEN
224          K = 0
225          IQ2 = 1
226          DO 40 J = 1, N
227             I = INDX( J )
228             CALL DCOPY( N, Q( 1, I ), 1, Q2( IQ2 ), 1 )
229             DLAMDA( J ) = D( I )
230             IQ2 = IQ2 + N
231    40    CONTINUE
232          CALL DLACPY( 'A', N, N, Q2, N, Q, LDQ )
233          CALL DCOPY( N, DLAMDA, 1, D, 1 )
234          GO TO 190
235       END IF
236 *
237 *     If there are multiple eigenvalues then the problem deflates.  Here
238 *     the number of equal eigenvalues are found.  As each equal
239 *     eigenvalue is found, an elementary reflector is computed to rotate
240 *     the corresponding eigensubspace so that the corresponding
241 *     components of Z are zero in this new basis.
242 *
243       DO 50 I = 1, N1
244          COLTYP( I ) = 1
245    50 CONTINUE
246       DO 60 I = N1P1, N
247          COLTYP( I ) = 3
248    60 CONTINUE
249 *
250 *
251       K = 0
252       K2 = N + 1
253       DO 70 J = 1, N
254          NJ = INDX( J )
255          IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
256 *
257 *           Deflate due to small z component.
258 *
259             K2 = K2 - 1
260             COLTYP( NJ ) = 4
261             INDXP( K2 ) = NJ
262             IF( J.EQ.N )
263      $         GO TO 100
264          ELSE
265             PJ = NJ
266             GO TO 80
267          END IF
268    70 CONTINUE
269    80 CONTINUE
270       J = J + 1
271       NJ = INDX( J )
272       IF( J.GT.N )
273      $   GO TO 100
274       IF( RHO*ABS( Z( NJ ) ).LE.TOL ) THEN
275 *
276 *        Deflate due to small z component.
277 *
278          K2 = K2 - 1
279          COLTYP( NJ ) = 4
280          INDXP( K2 ) = NJ
281       ELSE
282 *
283 *        Check if eigenvalues are close enough to allow deflation.
284 *
285          S = Z( PJ )
286          C = Z( NJ )
287 *
288 *        Find sqrt(a**2+b**2) without overflow or
289 *        destructive underflow.
290 *
291          TAU = DLAPY2( C, S )
292          T = D( NJ ) - D( PJ )
293          C = C / TAU
294          S = -/ TAU
295          IFABS( T*C*S ).LE.TOL ) THEN
296 *
297 *           Deflation is possible.
298 *
299             Z( NJ ) = TAU
300             Z( PJ ) = ZERO
301             IF( COLTYP( NJ ).NE.COLTYP( PJ ) )
302      $         COLTYP( NJ ) = 2
303             COLTYP( PJ ) = 4
304             CALL DROT( N, Q( 1, PJ ), 1, Q( 1, NJ ), 1, C, S )
305             T = D( PJ )*C**2 + D( NJ )*S**2
306             D( NJ ) = D( PJ )*S**2 + D( NJ )*C**2
307             D( PJ ) = T
308             K2 = K2 - 1
309             I = 1
310    90       CONTINUE
311             IF( K2+I.LE.N ) THEN
312                IF( D( PJ ).LT.D( INDXP( K2+I ) ) ) THEN
313                   INDXP( K2+I-1 ) = INDXP( K2+I )
314                   INDXP( K2+I ) = PJ
315                   I = I + 1
316                   GO TO 90
317                ELSE
318                   INDXP( K2+I-1 ) = PJ
319                END IF
320             ELSE
321                INDXP( K2+I-1 ) = PJ
322             END IF
323             PJ = NJ
324          ELSE
325             K = K + 1
326             DLAMDA( K ) = D( PJ )
327             W( K ) = Z( PJ )
328             INDXP( K ) = PJ
329             PJ = NJ
330          END IF
331       END IF
332       GO TO 80
333   100 CONTINUE
334 *
335 *     Record the last eigenvalue.
336 *
337       K = K + 1
338       DLAMDA( K ) = D( PJ )
339       W( K ) = Z( PJ )
340       INDXP( K ) = PJ
341 *
342 *     Count up the total number of the various types of columns, then
343 *     form a permutation which positions the four column types into
344 *     four uniform groups (although one or more of these groups may be
345 *     empty).
346 *
347       DO 110 J = 14
348          CTOT( J ) = 0
349   110 CONTINUE
350       DO 120 J = 1, N
351          CT = COLTYP( J )
352          CTOT( CT ) = CTOT( CT ) + 1
353   120 CONTINUE
354 *
355 *     PSM(*) = Position in SubMatrix (of types 1 through 4)
356 *
357       PSM( 1 ) = 1
358       PSM( 2 ) = 1 + CTOT( 1 )
359       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
360       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
361       K = N - CTOT( 4 )
362 *
363 *     Fill out the INDXC array so that the permutation which it induces
364 *     will place all type-1 columns first, all type-2 columns next,
365 *     then all type-3's, and finally all type-4's.
366 *
367       DO 130 J = 1, N
368          JS = INDXP( J )
369          CT = COLTYP( JS )
370          INDX( PSM( CT ) ) = JS
371          INDXC( PSM( CT ) ) = J
372          PSM( CT ) = PSM( CT ) + 1
373   130 CONTINUE
374 *
375 *     Sort the eigenvalues and corresponding eigenvectors into DLAMDA
376 *     and Q2 respectively.  The eigenvalues/vectors which were not
377 *     deflated go into the first K slots of DLAMDA and Q2 respectively,
378 *     while those which were deflated go into the last N - K slots.
379 *
380       I = 1
381       IQ1 = 1
382       IQ2 = 1 + ( CTOT( 1 )+CTOT( 2 ) )*N1
383       DO 140 J = 1, CTOT( 1 )
384          JS = INDX( I )
385          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
386          Z( I ) = D( JS )
387          I = I + 1
388          IQ1 = IQ1 + N1
389   140 CONTINUE
390 *
391       DO 150 J = 1, CTOT( 2 )
392          JS = INDX( I )
393          CALL DCOPY( N1, Q( 1, JS ), 1, Q2( IQ1 ), 1 )
394          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
395          Z( I ) = D( JS )
396          I = I + 1
397          IQ1 = IQ1 + N1
398          IQ2 = IQ2 + N2
399   150 CONTINUE
400 *
401       DO 160 J = 1, CTOT( 3 )
402          JS = INDX( I )
403          CALL DCOPY( N2, Q( N1+1, JS ), 1, Q2( IQ2 ), 1 )
404          Z( I ) = D( JS )
405          I = I + 1
406          IQ2 = IQ2 + N2
407   160 CONTINUE
408 *
409       IQ1 = IQ2
410       DO 170 J = 1, CTOT( 4 )
411          JS = INDX( I )
412          CALL DCOPY( N, Q( 1, JS ), 1, Q2( IQ2 ), 1 )
413          IQ2 = IQ2 + N
414          Z( I ) = D( JS )
415          I = I + 1
416   170 CONTINUE
417 *
418 *     The deflated eigenvalues and their corresponding vectors go back
419 *     into the last N - K slots of D and Q respectively.
420 *
421       CALL DLACPY( 'A', N, CTOT( 4 ), Q2( IQ1 ), N, Q( 1, K+1 ), LDQ )
422       CALL DCOPY( N-K, Z( K+1 ), 1, D( K+1 ), 1 )
423 *
424 *     Copy CTOT into COLTYP for referencing in DLAED3.
425 *
426       DO 180 J = 14
427          COLTYP( J ) = CTOT( J )
428   180 CONTINUE
429 *
430   190 CONTINUE
431       RETURN
432 *
433 *     End of DLAED2
434 *
435       END