1       SUBROUTINE DLASD7( ICOMPQ, NL, NR, SQRE, K, D, Z, ZW, VF, VFW, VL,
  2      $                   VLW, ALPHA, BETA, DSIGMA, IDX, IDXP, IDXQ,
  3      $                   PERM, GIVPTR, GIVCOL, LDGCOL, GIVNUM, LDGNUM,
  4      $                   C, S, INFO )
  5 *
  6 *  -- LAPACK auxiliary routine (version 3.2) --
  7 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  8 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  9 *     November 2006
 10 *
 11 *     .. Scalar Arguments ..
 12       INTEGER            GIVPTR, ICOMPQ, INFO, K, LDGCOL, LDGNUM, NL,
 13      $                   NR, SQRE
 14       DOUBLE PRECISION   ALPHA, BETA, C, S
 15 *     ..
 16 *     .. Array Arguments ..
 17       INTEGER            GIVCOL( LDGCOL, * ), IDX( * ), IDXP( * ),
 18      $                   IDXQ( * ), PERM( * )
 19       DOUBLE PRECISION   D( * ), DSIGMA( * ), GIVNUM( LDGNUM, * ),
 20      $                   VF( * ), VFW( * ), VL( * ), VLW( * ), Z( * ),
 21      $                   ZW( * )
 22 *     ..
 23 *
 24 *  Purpose
 25 *  =======
 26 *
 27 *  DLASD7 merges the two sets of singular values together into a single
 28 *  sorted set. Then it tries to deflate the size of the problem. There
 29 *  are two ways in which deflation can occur:  when two or more singular
 30 *  values are close together or if there is a tiny entry in the Z
 31 *  vector. For each such occurrence the order of the related
 32 *  secular equation problem is reduced by one.
 33 *
 34 *  DLASD7 is called from DLASD6.
 35 *
 36 *  Arguments
 37 *  =========
 38 *
 39 *  ICOMPQ  (input) INTEGER
 40 *          Specifies whether singular vectors are to be computed
 41 *          in compact form, as follows:
 42 *          = 0: Compute singular values only.
 43 *          = 1: Compute singular vectors of upper
 44 *               bidiagonal matrix in compact form.
 45 *
 46 *  NL     (input) INTEGER
 47 *         The row dimension of the upper block. NL >= 1.
 48 *
 49 *  NR     (input) INTEGER
 50 *         The row dimension of the lower block. NR >= 1.
 51 *
 52 *  SQRE   (input) INTEGER
 53 *         = 0: the lower block is an NR-by-NR square matrix.
 54 *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
 55 *
 56 *         The bidiagonal matrix has
 57 *         N = NL + NR + 1 rows and
 58 *         M = N + SQRE >= N columns.
 59 *
 60 *  K      (output) INTEGER
 61 *         Contains the dimension of the non-deflated matrix, this is
 62 *         the order of the related secular equation. 1 <= K <=N.
 63 *
 64 *  D      (input/output) DOUBLE PRECISION array, dimension ( N )
 65 *         On entry D contains the singular values of the two submatrices
 66 *         to be combined. On exit D contains the trailing (N-K) updated
 67 *         singular values (those which were deflated) sorted into
 68 *         increasing order.
 69 *
 70 *  Z      (output) DOUBLE PRECISION array, dimension ( M )
 71 *         On exit Z contains the updating row vector in the secular
 72 *         equation.
 73 *
 74 *  ZW     (workspace) DOUBLE PRECISION array, dimension ( M )
 75 *         Workspace for Z.
 76 *
 77 *  VF     (input/output) DOUBLE PRECISION array, dimension ( M )
 78 *         On entry, VF(1:NL+1) contains the first components of all
 79 *         right singular vectors of the upper block; and VF(NL+2:M)
 80 *         contains the first components of all right singular vectors
 81 *         of the lower block. On exit, VF contains the first components
 82 *         of all right singular vectors of the bidiagonal matrix.
 83 *
 84 *  VFW    (workspace) DOUBLE PRECISION array, dimension ( M )
 85 *         Workspace for VF.
 86 *
 87 *  VL     (input/output) DOUBLE PRECISION array, dimension ( M )
 88 *         On entry, VL(1:NL+1) contains the  last components of all
 89 *         right singular vectors of the upper block; and VL(NL+2:M)
 90 *         contains the last components of all right singular vectors
 91 *         of the lower block. On exit, VL contains the last components
 92 *         of all right singular vectors of the bidiagonal matrix.
 93 *
 94 *  VLW    (workspace) DOUBLE PRECISION array, dimension ( M )
 95 *         Workspace for VL.
 96 *
 97 *  ALPHA  (input) DOUBLE PRECISION
 98 *         Contains the diagonal element associated with the added row.
 99 *
100 *  BETA   (input) DOUBLE PRECISION
101 *         Contains the off-diagonal element associated with the added
102 *         row.
103 *
104 *  DSIGMA (output) DOUBLE PRECISION array, dimension ( N )
105 *         Contains a copy of the diagonal elements (K-1 singular values
106 *         and one zero) in the secular equation.
107 *
108 *  IDX    (workspace) INTEGER array, dimension ( N )
109 *         This will contain the permutation used to sort the contents of
110 *         D into ascending order.
111 *
112 *  IDXP   (workspace) INTEGER array, dimension ( N )
113 *         This will contain the permutation used to place deflated
114 *         values of D at the end of the array. On output IDXP(2:K)
115 *         points to the nondeflated D-values and IDXP(K+1:N)
116 *         points to the deflated singular values.
117 *
118 *  IDXQ   (input) INTEGER array, dimension ( N )
119 *         This contains the permutation which separately sorts the two
120 *         sub-problems in D into ascending order.  Note that entries in
121 *         the first half of this permutation must first be moved one
122 *         position backward; and entries in the second half
123 *         must first have NL+1 added to their values.
124 *
125 *  PERM   (output) INTEGER array, dimension ( N )
126 *         The permutations (from deflation and sorting) to be applied
127 *         to each singular block. Not referenced if ICOMPQ = 0.
128 *
129 *  GIVPTR (output) INTEGER
130 *         The number of Givens rotations which took place in this
131 *         subproblem. Not referenced if ICOMPQ = 0.
132 *
133 *  GIVCOL (output) INTEGER array, dimension ( LDGCOL, 2 )
134 *         Each pair of numbers indicates a pair of columns to take place
135 *         in a Givens rotation. Not referenced if ICOMPQ = 0.
136 *
137 *  LDGCOL (input) INTEGER
138 *         The leading dimension of GIVCOL, must be at least N.
139 *
140 *  GIVNUM (output) DOUBLE PRECISION array, dimension ( LDGNUM, 2 )
141 *         Each number indicates the C or S value to be used in the
142 *         corresponding Givens rotation. Not referenced if ICOMPQ = 0.
143 *
144 *  LDGNUM (input) INTEGER
145 *         The leading dimension of GIVNUM, must be at least N.
146 *
147 *  C      (output) DOUBLE PRECISION
148 *         C contains garbage if SQRE =0 and the C-value of a Givens
149 *         rotation related to the right null space if SQRE = 1.
150 *
151 *  S      (output) DOUBLE PRECISION
152 *         S contains garbage if SQRE =0 and the S-value of a Givens
153 *         rotation related to the right null space if SQRE = 1.
154 *
155 *  INFO   (output) INTEGER
156 *         = 0:  successful exit.
157 *         < 0:  if INFO = -i, the i-th argument had an illegal value.
158 *
159 *  Further Details
160 *  ===============
161 *
162 *  Based on contributions by
163 *     Ming Gu and Huan Ren, Computer Science Division, University of
164 *     California at Berkeley, USA
165 *
166 *  =====================================================================
167 *
168 *     .. Parameters ..
169       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
170       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
171      $                   EIGHT = 8.0D+0 )
172 *     ..
173 *     .. Local Scalars ..
174 *
175       INTEGER            I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M, N,
176      $                   NLP1, NLP2
177       DOUBLE PRECISION   EPS, HLFTOL, TAU, TOL, Z1
178 *     ..
179 *     .. External Subroutines ..
180       EXTERNAL           DCOPY, DLAMRG, DROT, XERBLA
181 *     ..
182 *     .. External Functions ..
183       DOUBLE PRECISION   DLAMCH, DLAPY2
184       EXTERNAL           DLAMCH, DLAPY2
185 *     ..
186 *     .. Intrinsic Functions ..
187       INTRINSIC          ABSMAX
188 *     ..
189 *     .. Executable Statements ..
190 *
191 *     Test the input parameters.
192 *
193       INFO = 0
194       N = NL + NR + 1
195       M = N + SQRE
196 *
197       IF( ( ICOMPQ.LT.0 ) .OR. ( ICOMPQ.GT.1 ) ) THEN
198          INFO = -1
199       ELSE IF( NL.LT.1 ) THEN
200          INFO = -2
201       ELSE IF( NR.LT.1 ) THEN
202          INFO = -3
203       ELSE IF( ( SQRE.LT.0 ) .OR. ( SQRE.GT.1 ) ) THEN
204          INFO = -4
205       ELSE IF( LDGCOL.LT.N ) THEN
206          INFO = -22
207       ELSE IF( LDGNUM.LT.N ) THEN
208          INFO = -24
209       END IF
210       IF( INFO.NE.0 ) THEN
211          CALL XERBLA( 'DLASD7'-INFO )
212          RETURN
213       END IF
214 *
215       NLP1 = NL + 1
216       NLP2 = NL + 2
217       IF( ICOMPQ.EQ.1 ) THEN
218          GIVPTR = 0
219       END IF
220 *
221 *     Generate the first part of the vector Z and move the singular
222 *     values in the first part of D one position backward.
223 *
224       Z1 = ALPHA*VL( NLP1 )
225       VL( NLP1 ) = ZERO
226       TAU = VF( NLP1 )
227       DO 10 I = NL, 1-1
228          Z( I+1 ) = ALPHA*VL( I )
229          VL( I ) = ZERO
230          VF( I+1 ) = VF( I )
231          D( I+1 ) = D( I )
232          IDXQ( I+1 ) = IDXQ( I ) + 1
233    10 CONTINUE
234       VF( 1 ) = TAU
235 *
236 *     Generate the second part of the vector Z.
237 *
238       DO 20 I = NLP2, M
239          Z( I ) = BETA*VF( I )
240          VF( I ) = ZERO
241    20 CONTINUE
242 *
243 *     Sort the singular values into increasing order
244 *
245       DO 30 I = NLP2, N
246          IDXQ( I ) = IDXQ( I ) + NLP1
247    30 CONTINUE
248 *
249 *     DSIGMA, IDXC, IDXC, and ZW are used as storage space.
250 *
251       DO 40 I = 2, N
252          DSIGMA( I ) = D( IDXQ( I ) )
253          ZW( I ) = Z( IDXQ( I ) )
254          VFW( I ) = VF( IDXQ( I ) )
255          VLW( I ) = VL( IDXQ( I ) )
256    40 CONTINUE
257 *
258       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 11, IDX( 2 ) )
259 *
260       DO 50 I = 2, N
261          IDXI = 1 + IDX( I )
262          D( I ) = DSIGMA( IDXI )
263          Z( I ) = ZW( IDXI )
264          VF( I ) = VFW( IDXI )
265          VL( I ) = VLW( IDXI )
266    50 CONTINUE
267 *
268 *     Calculate the allowable deflation tolerence
269 *
270       EPS = DLAMCH( 'Epsilon' )
271       TOL = MAXABS( ALPHA ), ABS( BETA ) )
272       TOL = EIGHT*EIGHT*EPS*MAXABS( D( N ) ), TOL )
273 *
274 *     There are 2 kinds of deflation -- first a value in the z-vector
275 *     is small, second two (or more) singular values are very close
276 *     together (their difference is small).
277 *
278 *     If the value in the z-vector is small, we simply permute the
279 *     array so that the corresponding singular value is moved to the
280 *     end.
281 *
282 *     If two values in the D-vector are close, we perform a two-sided
283 *     rotation designed to make one of the corresponding z-vector
284 *     entries zero, and then permute the array so that the deflated
285 *     singular value is moved to the end.
286 *
287 *     If there are multiple singular values then the problem deflates.
288 *     Here the number of equal singular values are found.  As each equal
289 *     singular value is found, an elementary reflector is computed to
290 *     rotate the corresponding singular subspace so that the
291 *     corresponding components of Z are zero in this new basis.
292 *
293       K = 1
294       K2 = N + 1
295       DO 60 J = 2, N
296          IFABS( Z( J ) ).LE.TOL ) THEN
297 *
298 *           Deflate due to small z component.
299 *
300             K2 = K2 - 1
301             IDXP( K2 ) = J
302             IF( J.EQ.N )
303      $         GO TO 100
304          ELSE
305             JPREV = J
306             GO TO 70
307          END IF
308    60 CONTINUE
309    70 CONTINUE
310       J = JPREV
311    80 CONTINUE
312       J = J + 1
313       IF( J.GT.N )
314      $   GO TO 90
315       IFABS( Z( J ) ).LE.TOL ) THEN
316 *
317 *        Deflate due to small z component.
318 *
319          K2 = K2 - 1
320          IDXP( K2 ) = J
321       ELSE
322 *
323 *        Check if singular values are close enough to allow deflation.
324 *
325          IFABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
326 *
327 *           Deflation is possible.
328 *
329             S = Z( JPREV )
330             C = Z( J )
331 *
332 *           Find sqrt(a**2+b**2) without overflow or
333 *           destructive underflow.
334 *
335             TAU = DLAPY2( C, S )
336             Z( J ) = TAU
337             Z( JPREV ) = ZERO
338             C = C / TAU
339             S = -/ TAU
340 *
341 *           Record the appropriate Givens rotation
342 *
343             IF( ICOMPQ.EQ.1 ) THEN
344                GIVPTR = GIVPTR + 1
345                IDXJP = IDXQ( IDX( JPREV )+1 )
346                IDXJ = IDXQ( IDX( J )+1 )
347                IF( IDXJP.LE.NLP1 ) THEN
348                   IDXJP = IDXJP - 1
349                END IF
350                IF( IDXJ.LE.NLP1 ) THEN
351                   IDXJ = IDXJ - 1
352                END IF
353                GIVCOL( GIVPTR, 2 ) = IDXJP
354                GIVCOL( GIVPTR, 1 ) = IDXJ
355                GIVNUM( GIVPTR, 2 ) = C
356                GIVNUM( GIVPTR, 1 ) = S
357             END IF
358             CALL DROT( 1, VF( JPREV ), 1, VF( J ), 1, C, S )
359             CALL DROT( 1, VL( JPREV ), 1, VL( J ), 1, C, S )
360             K2 = K2 - 1
361             IDXP( K2 ) = JPREV
362             JPREV = J
363          ELSE
364             K = K + 1
365             ZW( K ) = Z( JPREV )
366             DSIGMA( K ) = D( JPREV )
367             IDXP( K ) = JPREV
368             JPREV = J
369          END IF
370       END IF
371       GO TO 80
372    90 CONTINUE
373 *
374 *     Record the last singular value.
375 *
376       K = K + 1
377       ZW( K ) = Z( JPREV )
378       DSIGMA( K ) = D( JPREV )
379       IDXP( K ) = JPREV
380 *
381   100 CONTINUE
382 *
383 *     Sort the singular values into DSIGMA. The singular values which
384 *     were not deflated go into the first K slots of DSIGMA, except
385 *     that DSIGMA(1) is treated separately.
386 *
387       DO 110 J = 2, N
388          JP = IDXP( J )
389          DSIGMA( J ) = D( JP )
390          VFW( J ) = VF( JP )
391          VLW( J ) = VL( JP )
392   110 CONTINUE
393       IF( ICOMPQ.EQ.1 ) THEN
394          DO 120 J = 2, N
395             JP = IDXP( J )
396             PERM( J ) = IDXQ( IDX( JP )+1 )
397             IF( PERM( J ).LE.NLP1 ) THEN
398                PERM( J ) = PERM( J ) - 1
399             END IF
400   120    CONTINUE
401       END IF
402 *
403 *     The deflated singular values go back into the last N - K slots of
404 *     D.
405 *
406       CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
407 *
408 *     Determine DSIGMA(1), DSIGMA(2), Z(1), VF(1), VL(1), VF(M), and
409 *     VL(M).
410 *
411       DSIGMA( 1 ) = ZERO
412       HLFTOL = TOL / TWO
413       IFABS( DSIGMA( 2 ) ).LE.HLFTOL )
414      $   DSIGMA( 2 ) = HLFTOL
415       IF( M.GT.N ) THEN
416          Z( 1 ) = DLAPY2( Z1, Z( M ) )
417          IF( Z( 1 ).LE.TOL ) THEN
418             C = ONE
419             S = ZERO
420             Z( 1 ) = TOL
421          ELSE
422             C = Z1 / Z( 1 )
423             S = -Z( M ) / Z( 1 )
424          END IF
425          CALL DROT( 1, VF( M ), 1, VF( 1 ), 1, C, S )
426          CALL DROT( 1, VL( M ), 1, VL( 1 ), 1, C, S )
427       ELSE
428          IFABS( Z1 ).LE.TOL ) THEN
429             Z( 1 ) = TOL
430          ELSE
431             Z( 1 ) = Z1
432          END IF
433       END IF
434 *
435 *     Restore Z, VF, and VL.
436 *
437       CALL DCOPY( K-1, ZW( 2 ), 1, Z( 2 ), 1 )
438       CALL DCOPY( N-1, VFW( 2 ), 1, VF( 2 ), 1 )
439       CALL DCOPY( N-1, VLW( 2 ), 1, VL( 2 ), 1 )
440 *
441       RETURN
442 *
443 *     End of DLASD7
444 *
445       END