1 SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
2 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
3 $ IDXC, IDXQ, COLTYP, INFO )
4 *
5 * -- LAPACK auxiliary routine (version 3.2) --
6 * -- LAPACK is a software package provided by Univ. of Tennessee, --
7 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
8 * November 2006
9 *
10 * .. Scalar Arguments ..
11 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
12 DOUBLE PRECISION ALPHA, BETA
13 * ..
14 * .. Array Arguments ..
15 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
16 $ IDXQ( * )
17 DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ),
18 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
19 $ Z( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * DLASD2 merges the two sets of singular values together into a single
26 * sorted set. Then it tries to deflate the size of the problem.
27 * There are two ways in which deflation can occur: when two or more
28 * singular values are close together or if there is a tiny entry in the
29 * Z vector. For each such occurrence the order of the related secular
30 * equation problem is reduced by one.
31 *
32 * DLASD2 is called from DLASD1.
33 *
34 * Arguments
35 * =========
36 *
37 * NL (input) INTEGER
38 * The row dimension of the upper block. NL >= 1.
39 *
40 * NR (input) INTEGER
41 * The row dimension of the lower block. NR >= 1.
42 *
43 * SQRE (input) INTEGER
44 * = 0: the lower block is an NR-by-NR square matrix.
45 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
46 *
47 * The bidiagonal matrix has N = NL + NR + 1 rows and
48 * M = N + SQRE >= N columns.
49 *
50 * K (output) INTEGER
51 * Contains the dimension of the non-deflated matrix,
52 * This is the order of the related secular equation. 1 <= K <=N.
53 *
54 * D (input/output) DOUBLE PRECISION array, dimension(N)
55 * On entry D contains the singular values of the two submatrices
56 * to be combined. On exit D contains the trailing (N-K) updated
57 * singular values (those which were deflated) sorted into
58 * increasing order.
59 *
60 * Z (output) DOUBLE PRECISION array, dimension(N)
61 * On exit Z contains the updating row vector in the secular
62 * equation.
63 *
64 * ALPHA (input) DOUBLE PRECISION
65 * Contains the diagonal element associated with the added row.
66 *
67 * BETA (input) DOUBLE PRECISION
68 * Contains the off-diagonal element associated with the added
69 * row.
70 *
71 * U (input/output) DOUBLE PRECISION array, dimension(LDU,N)
72 * On entry U contains the left singular vectors of two
73 * submatrices in the two square blocks with corners at (1,1),
74 * (NL, NL), and (NL+2, NL+2), (N,N).
75 * On exit U contains the trailing (N-K) updated left singular
76 * vectors (those which were deflated) in its last N-K columns.
77 *
78 * LDU (input) INTEGER
79 * The leading dimension of the array U. LDU >= N.
80 *
81 * VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
82 * On entry VT**T contains the right singular vectors of two
83 * submatrices in the two square blocks with corners at (1,1),
84 * (NL+1, NL+1), and (NL+2, NL+2), (M,M).
85 * On exit VT**T contains the trailing (N-K) updated right singular
86 * vectors (those which were deflated) in its last N-K columns.
87 * In case SQRE =1, the last row of VT spans the right null
88 * space.
89 *
90 * LDVT (input) INTEGER
91 * The leading dimension of the array VT. LDVT >= M.
92 *
93 * DSIGMA (output) DOUBLE PRECISION array, dimension (N)
94 * Contains a copy of the diagonal elements (K-1 singular values
95 * and one zero) in the secular equation.
96 *
97 * U2 (output) DOUBLE PRECISION array, dimension(LDU2,N)
98 * Contains a copy of the first K-1 left singular vectors which
99 * will be used by DLASD3 in a matrix multiply (DGEMM) to solve
100 * for the new left singular vectors. U2 is arranged into four
101 * blocks. The first block contains a column with 1 at NL+1 and
102 * zero everywhere else; the second block contains non-zero
103 * entries only at and above NL; the third contains non-zero
104 * entries only below NL+1; and the fourth is dense.
105 *
106 * LDU2 (input) INTEGER
107 * The leading dimension of the array U2. LDU2 >= N.
108 *
109 * VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N)
110 * VT2**T contains a copy of the first K right singular vectors
111 * which will be used by DLASD3 in a matrix multiply (DGEMM) to
112 * solve for the new right singular vectors. VT2 is arranged into
113 * three blocks. The first block contains a row that corresponds
114 * to the special 0 diagonal element in SIGMA; the second block
115 * contains non-zeros only at and before NL +1; the third block
116 * contains non-zeros only at and after NL +2.
117 *
118 * LDVT2 (input) INTEGER
119 * The leading dimension of the array VT2. LDVT2 >= M.
120 *
121 * IDXP (workspace) INTEGER array dimension(N)
122 * This will contain the permutation used to place deflated
123 * values of D at the end of the array. On output IDXP(2:K)
124 * points to the nondeflated D-values and IDXP(K+1:N)
125 * points to the deflated singular values.
126 *
127 * IDX (workspace) INTEGER array dimension(N)
128 * This will contain the permutation used to sort the contents of
129 * D into ascending order.
130 *
131 * IDXC (output) INTEGER array dimension(N)
132 * This will contain the permutation used to arrange the columns
133 * of the deflated U matrix into three groups: the first group
134 * contains non-zero entries only at and above NL, the second
135 * contains non-zero entries only below NL+2, and the third is
136 * dense.
137 *
138 * IDXQ (input/output) INTEGER array dimension(N)
139 * This contains the permutation which separately sorts the two
140 * sub-problems in D into ascending order. Note that entries in
141 * the first hlaf of this permutation must first be moved one
142 * position backward; and entries in the second half
143 * must first have NL+1 added to their values.
144 *
145 * COLTYP (workspace/output) INTEGER array dimension(N)
146 * As workspace, this will contain a label which will indicate
147 * which of the following types a column in the U2 matrix or a
148 * row in the VT2 matrix is:
149 * 1 : non-zero in the upper half only
150 * 2 : non-zero in the lower half only
151 * 3 : dense
152 * 4 : deflated
153 *
154 * On exit, it is an array of dimension 4, with COLTYP(I) being
155 * the dimension of the I-th type columns.
156 *
157 * INFO (output) INTEGER
158 * = 0: successful exit.
159 * < 0: if INFO = -i, the i-th argument had an illegal value.
160 *
161 * Further Details
162 * ===============
163 *
164 * Based on contributions by
165 * Ming Gu and Huan Ren, Computer Science Division, University of
166 * California at Berkeley, USA
167 *
168 * =====================================================================
169 *
170 * .. Parameters ..
171 DOUBLE PRECISION ZERO, ONE, TWO, EIGHT
172 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
173 $ EIGHT = 8.0D+0 )
174 * ..
175 * .. Local Arrays ..
176 INTEGER CTOT( 4 ), PSM( 4 )
177 * ..
178 * .. Local Scalars ..
179 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
180 $ N, NLP1, NLP2
181 DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1
182 * ..
183 * .. External Functions ..
184 DOUBLE PRECISION DLAMCH, DLAPY2
185 EXTERNAL DLAMCH, DLAPY2
186 * ..
187 * .. External Subroutines ..
188 EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
189 * ..
190 * .. Intrinsic Functions ..
191 INTRINSIC ABS, MAX
192 * ..
193 * .. Executable Statements ..
194 *
195 * Test the input parameters.
196 *
197 INFO = 0
198 *
199 IF( NL.LT.1 ) THEN
200 INFO = -1
201 ELSE IF( NR.LT.1 ) THEN
202 INFO = -2
203 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
204 INFO = -3
205 END IF
206 *
207 N = NL + NR + 1
208 M = N + SQRE
209 *
210 IF( LDU.LT.N ) THEN
211 INFO = -10
212 ELSE IF( LDVT.LT.M ) THEN
213 INFO = -12
214 ELSE IF( LDU2.LT.N ) THEN
215 INFO = -15
216 ELSE IF( LDVT2.LT.M ) THEN
217 INFO = -17
218 END IF
219 IF( INFO.NE.0 ) THEN
220 CALL XERBLA( 'DLASD2', -INFO )
221 RETURN
222 END IF
223 *
224 NLP1 = NL + 1
225 NLP2 = NL + 2
226 *
227 * Generate the first part of the vector Z; and move the singular
228 * values in the first part of D one position backward.
229 *
230 Z1 = ALPHA*VT( NLP1, NLP1 )
231 Z( 1 ) = Z1
232 DO 10 I = NL, 1, -1
233 Z( I+1 ) = ALPHA*VT( I, NLP1 )
234 D( I+1 ) = D( I )
235 IDXQ( I+1 ) = IDXQ( I ) + 1
236 10 CONTINUE
237 *
238 * Generate the second part of the vector Z.
239 *
240 DO 20 I = NLP2, M
241 Z( I ) = BETA*VT( I, NLP2 )
242 20 CONTINUE
243 *
244 * Initialize some reference arrays.
245 *
246 DO 30 I = 2, NLP1
247 COLTYP( I ) = 1
248 30 CONTINUE
249 DO 40 I = NLP2, N
250 COLTYP( I ) = 2
251 40 CONTINUE
252 *
253 * Sort the singular values into increasing order
254 *
255 DO 50 I = NLP2, N
256 IDXQ( I ) = IDXQ( I ) + NLP1
257 50 CONTINUE
258 *
259 * DSIGMA, IDXC, IDXC, and the first column of U2
260 * are used as storage space.
261 *
262 DO 60 I = 2, N
263 DSIGMA( I ) = D( IDXQ( I ) )
264 U2( I, 1 ) = Z( IDXQ( I ) )
265 IDXC( I ) = COLTYP( IDXQ( I ) )
266 60 CONTINUE
267 *
268 CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
269 *
270 DO 70 I = 2, N
271 IDXI = 1 + IDX( I )
272 D( I ) = DSIGMA( IDXI )
273 Z( I ) = U2( IDXI, 1 )
274 COLTYP( I ) = IDXC( IDXI )
275 70 CONTINUE
276 *
277 * Calculate the allowable deflation tolerance
278 *
279 EPS = DLAMCH( 'Epsilon' )
280 TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
281 TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
282 *
283 * There are 2 kinds of deflation -- first a value in the z-vector
284 * is small, second two (or more) singular values are very close
285 * together (their difference is small).
286 *
287 * If the value in the z-vector is small, we simply permute the
288 * array so that the corresponding singular value is moved to the
289 * end.
290 *
291 * If two values in the D-vector are close, we perform a two-sided
292 * rotation designed to make one of the corresponding z-vector
293 * entries zero, and then permute the array so that the deflated
294 * singular value is moved to the end.
295 *
296 * If there are multiple singular values then the problem deflates.
297 * Here the number of equal singular values are found. As each equal
298 * singular value is found, an elementary reflector is computed to
299 * rotate the corresponding singular subspace so that the
300 * corresponding components of Z are zero in this new basis.
301 *
302 K = 1
303 K2 = N + 1
304 DO 80 J = 2, N
305 IF( ABS( Z( J ) ).LE.TOL ) THEN
306 *
307 * Deflate due to small z component.
308 *
309 K2 = K2 - 1
310 IDXP( K2 ) = J
311 COLTYP( J ) = 4
312 IF( J.EQ.N )
313 $ GO TO 120
314 ELSE
315 JPREV = J
316 GO TO 90
317 END IF
318 80 CONTINUE
319 90 CONTINUE
320 J = JPREV
321 100 CONTINUE
322 J = J + 1
323 IF( J.GT.N )
324 $ GO TO 110
325 IF( ABS( Z( J ) ).LE.TOL ) THEN
326 *
327 * Deflate due to small z component.
328 *
329 K2 = K2 - 1
330 IDXP( K2 ) = J
331 COLTYP( J ) = 4
332 ELSE
333 *
334 * Check if singular values are close enough to allow deflation.
335 *
336 IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
337 *
338 * Deflation is possible.
339 *
340 S = Z( JPREV )
341 C = Z( J )
342 *
343 * Find sqrt(a**2+b**2) without overflow or
344 * destructive underflow.
345 *
346 TAU = DLAPY2( C, S )
347 C = C / TAU
348 S = -S / TAU
349 Z( J ) = TAU
350 Z( JPREV ) = ZERO
351 *
352 * Apply back the Givens rotation to the left and right
353 * singular vector matrices.
354 *
355 IDXJP = IDXQ( IDX( JPREV )+1 )
356 IDXJ = IDXQ( IDX( J )+1 )
357 IF( IDXJP.LE.NLP1 ) THEN
358 IDXJP = IDXJP - 1
359 END IF
360 IF( IDXJ.LE.NLP1 ) THEN
361 IDXJ = IDXJ - 1
362 END IF
363 CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
364 CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
365 $ S )
366 IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
367 COLTYP( J ) = 3
368 END IF
369 COLTYP( JPREV ) = 4
370 K2 = K2 - 1
371 IDXP( K2 ) = JPREV
372 JPREV = J
373 ELSE
374 K = K + 1
375 U2( K, 1 ) = Z( JPREV )
376 DSIGMA( K ) = D( JPREV )
377 IDXP( K ) = JPREV
378 JPREV = J
379 END IF
380 END IF
381 GO TO 100
382 110 CONTINUE
383 *
384 * Record the last singular value.
385 *
386 K = K + 1
387 U2( K, 1 ) = Z( JPREV )
388 DSIGMA( K ) = D( JPREV )
389 IDXP( K ) = JPREV
390 *
391 120 CONTINUE
392 *
393 * Count up the total number of the various types of columns, then
394 * form a permutation which positions the four column types into
395 * four groups of uniform structure (although one or more of these
396 * groups may be empty).
397 *
398 DO 130 J = 1, 4
399 CTOT( J ) = 0
400 130 CONTINUE
401 DO 140 J = 2, N
402 CT = COLTYP( J )
403 CTOT( CT ) = CTOT( CT ) + 1
404 140 CONTINUE
405 *
406 * PSM(*) = Position in SubMatrix (of types 1 through 4)
407 *
408 PSM( 1 ) = 2
409 PSM( 2 ) = 2 + CTOT( 1 )
410 PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
411 PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
412 *
413 * Fill out the IDXC array so that the permutation which it induces
414 * will place all type-1 columns first, all type-2 columns next,
415 * then all type-3's, and finally all type-4's, starting from the
416 * second column. This applies similarly to the rows of VT.
417 *
418 DO 150 J = 2, N
419 JP = IDXP( J )
420 CT = COLTYP( JP )
421 IDXC( PSM( CT ) ) = J
422 PSM( CT ) = PSM( CT ) + 1
423 150 CONTINUE
424 *
425 * Sort the singular values and corresponding singular vectors into
426 * DSIGMA, U2, and VT2 respectively. The singular values/vectors
427 * which were not deflated go into the first K slots of DSIGMA, U2,
428 * and VT2 respectively, while those which were deflated go into the
429 * last N - K slots, except that the first column/row will be treated
430 * separately.
431 *
432 DO 160 J = 2, N
433 JP = IDXP( J )
434 DSIGMA( J ) = D( JP )
435 IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
436 IF( IDXJ.LE.NLP1 ) THEN
437 IDXJ = IDXJ - 1
438 END IF
439 CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
440 CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
441 160 CONTINUE
442 *
443 * Determine DSIGMA(1), DSIGMA(2) and Z(1)
444 *
445 DSIGMA( 1 ) = ZERO
446 HLFTOL = TOL / TWO
447 IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
448 $ DSIGMA( 2 ) = HLFTOL
449 IF( M.GT.N ) THEN
450 Z( 1 ) = DLAPY2( Z1, Z( M ) )
451 IF( Z( 1 ).LE.TOL ) THEN
452 C = ONE
453 S = ZERO
454 Z( 1 ) = TOL
455 ELSE
456 C = Z1 / Z( 1 )
457 S = Z( M ) / Z( 1 )
458 END IF
459 ELSE
460 IF( ABS( Z1 ).LE.TOL ) THEN
461 Z( 1 ) = TOL
462 ELSE
463 Z( 1 ) = Z1
464 END IF
465 END IF
466 *
467 * Move the rest of the updating row to Z.
468 *
469 CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
470 *
471 * Determine the first column of U2, the first row of VT2 and the
472 * last row of VT.
473 *
474 CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
475 U2( NLP1, 1 ) = ONE
476 IF( M.GT.N ) THEN
477 DO 170 I = 1, NLP1
478 VT( M, I ) = -S*VT( NLP1, I )
479 VT2( 1, I ) = C*VT( NLP1, I )
480 170 CONTINUE
481 DO 180 I = NLP2, M
482 VT2( 1, I ) = S*VT( M, I )
483 VT( M, I ) = C*VT( M, I )
484 180 CONTINUE
485 ELSE
486 CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
487 END IF
488 IF( M.GT.N ) THEN
489 CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
490 END IF
491 *
492 * The deflated singular values and their corresponding vectors go
493 * into the back of D, U, and V respectively.
494 *
495 IF( N.GT.K ) THEN
496 CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
497 CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
498 $ LDU )
499 CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
500 $ LDVT )
501 END IF
502 *
503 * Copy CTOT into COLTYP for referencing in DLASD3.
504 *
505 DO 190 J = 1, 4
506 COLTYP( J ) = CTOT( J )
507 190 CONTINUE
508 *
509 RETURN
510 *
511 * End of DLASD2
512 *
513 END
2 $ LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
3 $ IDXC, IDXQ, COLTYP, INFO )
4 *
5 * -- LAPACK auxiliary routine (version 3.2) --
6 * -- LAPACK is a software package provided by Univ. of Tennessee, --
7 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
8 * November 2006
9 *
10 * .. Scalar Arguments ..
11 INTEGER INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
12 DOUBLE PRECISION ALPHA, BETA
13 * ..
14 * .. Array Arguments ..
15 INTEGER COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
16 $ IDXQ( * )
17 DOUBLE PRECISION D( * ), DSIGMA( * ), U( LDU, * ),
18 $ U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
19 $ Z( * )
20 * ..
21 *
22 * Purpose
23 * =======
24 *
25 * DLASD2 merges the two sets of singular values together into a single
26 * sorted set. Then it tries to deflate the size of the problem.
27 * There are two ways in which deflation can occur: when two or more
28 * singular values are close together or if there is a tiny entry in the
29 * Z vector. For each such occurrence the order of the related secular
30 * equation problem is reduced by one.
31 *
32 * DLASD2 is called from DLASD1.
33 *
34 * Arguments
35 * =========
36 *
37 * NL (input) INTEGER
38 * The row dimension of the upper block. NL >= 1.
39 *
40 * NR (input) INTEGER
41 * The row dimension of the lower block. NR >= 1.
42 *
43 * SQRE (input) INTEGER
44 * = 0: the lower block is an NR-by-NR square matrix.
45 * = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
46 *
47 * The bidiagonal matrix has N = NL + NR + 1 rows and
48 * M = N + SQRE >= N columns.
49 *
50 * K (output) INTEGER
51 * Contains the dimension of the non-deflated matrix,
52 * This is the order of the related secular equation. 1 <= K <=N.
53 *
54 * D (input/output) DOUBLE PRECISION array, dimension(N)
55 * On entry D contains the singular values of the two submatrices
56 * to be combined. On exit D contains the trailing (N-K) updated
57 * singular values (those which were deflated) sorted into
58 * increasing order.
59 *
60 * Z (output) DOUBLE PRECISION array, dimension(N)
61 * On exit Z contains the updating row vector in the secular
62 * equation.
63 *
64 * ALPHA (input) DOUBLE PRECISION
65 * Contains the diagonal element associated with the added row.
66 *
67 * BETA (input) DOUBLE PRECISION
68 * Contains the off-diagonal element associated with the added
69 * row.
70 *
71 * U (input/output) DOUBLE PRECISION array, dimension(LDU,N)
72 * On entry U contains the left singular vectors of two
73 * submatrices in the two square blocks with corners at (1,1),
74 * (NL, NL), and (NL+2, NL+2), (N,N).
75 * On exit U contains the trailing (N-K) updated left singular
76 * vectors (those which were deflated) in its last N-K columns.
77 *
78 * LDU (input) INTEGER
79 * The leading dimension of the array U. LDU >= N.
80 *
81 * VT (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
82 * On entry VT**T contains the right singular vectors of two
83 * submatrices in the two square blocks with corners at (1,1),
84 * (NL+1, NL+1), and (NL+2, NL+2), (M,M).
85 * On exit VT**T contains the trailing (N-K) updated right singular
86 * vectors (those which were deflated) in its last N-K columns.
87 * In case SQRE =1, the last row of VT spans the right null
88 * space.
89 *
90 * LDVT (input) INTEGER
91 * The leading dimension of the array VT. LDVT >= M.
92 *
93 * DSIGMA (output) DOUBLE PRECISION array, dimension (N)
94 * Contains a copy of the diagonal elements (K-1 singular values
95 * and one zero) in the secular equation.
96 *
97 * U2 (output) DOUBLE PRECISION array, dimension(LDU2,N)
98 * Contains a copy of the first K-1 left singular vectors which
99 * will be used by DLASD3 in a matrix multiply (DGEMM) to solve
100 * for the new left singular vectors. U2 is arranged into four
101 * blocks. The first block contains a column with 1 at NL+1 and
102 * zero everywhere else; the second block contains non-zero
103 * entries only at and above NL; the third contains non-zero
104 * entries only below NL+1; and the fourth is dense.
105 *
106 * LDU2 (input) INTEGER
107 * The leading dimension of the array U2. LDU2 >= N.
108 *
109 * VT2 (output) DOUBLE PRECISION array, dimension(LDVT2,N)
110 * VT2**T contains a copy of the first K right singular vectors
111 * which will be used by DLASD3 in a matrix multiply (DGEMM) to
112 * solve for the new right singular vectors. VT2 is arranged into
113 * three blocks. The first block contains a row that corresponds
114 * to the special 0 diagonal element in SIGMA; the second block
115 * contains non-zeros only at and before NL +1; the third block
116 * contains non-zeros only at and after NL +2.
117 *
118 * LDVT2 (input) INTEGER
119 * The leading dimension of the array VT2. LDVT2 >= M.
120 *
121 * IDXP (workspace) INTEGER array dimension(N)
122 * This will contain the permutation used to place deflated
123 * values of D at the end of the array. On output IDXP(2:K)
124 * points to the nondeflated D-values and IDXP(K+1:N)
125 * points to the deflated singular values.
126 *
127 * IDX (workspace) INTEGER array dimension(N)
128 * This will contain the permutation used to sort the contents of
129 * D into ascending order.
130 *
131 * IDXC (output) INTEGER array dimension(N)
132 * This will contain the permutation used to arrange the columns
133 * of the deflated U matrix into three groups: the first group
134 * contains non-zero entries only at and above NL, the second
135 * contains non-zero entries only below NL+2, and the third is
136 * dense.
137 *
138 * IDXQ (input/output) INTEGER array dimension(N)
139 * This contains the permutation which separately sorts the two
140 * sub-problems in D into ascending order. Note that entries in
141 * the first hlaf of this permutation must first be moved one
142 * position backward; and entries in the second half
143 * must first have NL+1 added to their values.
144 *
145 * COLTYP (workspace/output) INTEGER array dimension(N)
146 * As workspace, this will contain a label which will indicate
147 * which of the following types a column in the U2 matrix or a
148 * row in the VT2 matrix is:
149 * 1 : non-zero in the upper half only
150 * 2 : non-zero in the lower half only
151 * 3 : dense
152 * 4 : deflated
153 *
154 * On exit, it is an array of dimension 4, with COLTYP(I) being
155 * the dimension of the I-th type columns.
156 *
157 * INFO (output) INTEGER
158 * = 0: successful exit.
159 * < 0: if INFO = -i, the i-th argument had an illegal value.
160 *
161 * Further Details
162 * ===============
163 *
164 * Based on contributions by
165 * Ming Gu and Huan Ren, Computer Science Division, University of
166 * California at Berkeley, USA
167 *
168 * =====================================================================
169 *
170 * .. Parameters ..
171 DOUBLE PRECISION ZERO, ONE, TWO, EIGHT
172 PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
173 $ EIGHT = 8.0D+0 )
174 * ..
175 * .. Local Arrays ..
176 INTEGER CTOT( 4 ), PSM( 4 )
177 * ..
178 * .. Local Scalars ..
179 INTEGER CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
180 $ N, NLP1, NLP2
181 DOUBLE PRECISION C, EPS, HLFTOL, S, TAU, TOL, Z1
182 * ..
183 * .. External Functions ..
184 DOUBLE PRECISION DLAMCH, DLAPY2
185 EXTERNAL DLAMCH, DLAPY2
186 * ..
187 * .. External Subroutines ..
188 EXTERNAL DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
189 * ..
190 * .. Intrinsic Functions ..
191 INTRINSIC ABS, MAX
192 * ..
193 * .. Executable Statements ..
194 *
195 * Test the input parameters.
196 *
197 INFO = 0
198 *
199 IF( NL.LT.1 ) THEN
200 INFO = -1
201 ELSE IF( NR.LT.1 ) THEN
202 INFO = -2
203 ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
204 INFO = -3
205 END IF
206 *
207 N = NL + NR + 1
208 M = N + SQRE
209 *
210 IF( LDU.LT.N ) THEN
211 INFO = -10
212 ELSE IF( LDVT.LT.M ) THEN
213 INFO = -12
214 ELSE IF( LDU2.LT.N ) THEN
215 INFO = -15
216 ELSE IF( LDVT2.LT.M ) THEN
217 INFO = -17
218 END IF
219 IF( INFO.NE.0 ) THEN
220 CALL XERBLA( 'DLASD2', -INFO )
221 RETURN
222 END IF
223 *
224 NLP1 = NL + 1
225 NLP2 = NL + 2
226 *
227 * Generate the first part of the vector Z; and move the singular
228 * values in the first part of D one position backward.
229 *
230 Z1 = ALPHA*VT( NLP1, NLP1 )
231 Z( 1 ) = Z1
232 DO 10 I = NL, 1, -1
233 Z( I+1 ) = ALPHA*VT( I, NLP1 )
234 D( I+1 ) = D( I )
235 IDXQ( I+1 ) = IDXQ( I ) + 1
236 10 CONTINUE
237 *
238 * Generate the second part of the vector Z.
239 *
240 DO 20 I = NLP2, M
241 Z( I ) = BETA*VT( I, NLP2 )
242 20 CONTINUE
243 *
244 * Initialize some reference arrays.
245 *
246 DO 30 I = 2, NLP1
247 COLTYP( I ) = 1
248 30 CONTINUE
249 DO 40 I = NLP2, N
250 COLTYP( I ) = 2
251 40 CONTINUE
252 *
253 * Sort the singular values into increasing order
254 *
255 DO 50 I = NLP2, N
256 IDXQ( I ) = IDXQ( I ) + NLP1
257 50 CONTINUE
258 *
259 * DSIGMA, IDXC, IDXC, and the first column of U2
260 * are used as storage space.
261 *
262 DO 60 I = 2, N
263 DSIGMA( I ) = D( IDXQ( I ) )
264 U2( I, 1 ) = Z( IDXQ( I ) )
265 IDXC( I ) = COLTYP( IDXQ( I ) )
266 60 CONTINUE
267 *
268 CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
269 *
270 DO 70 I = 2, N
271 IDXI = 1 + IDX( I )
272 D( I ) = DSIGMA( IDXI )
273 Z( I ) = U2( IDXI, 1 )
274 COLTYP( I ) = IDXC( IDXI )
275 70 CONTINUE
276 *
277 * Calculate the allowable deflation tolerance
278 *
279 EPS = DLAMCH( 'Epsilon' )
280 TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
281 TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
282 *
283 * There are 2 kinds of deflation -- first a value in the z-vector
284 * is small, second two (or more) singular values are very close
285 * together (their difference is small).
286 *
287 * If the value in the z-vector is small, we simply permute the
288 * array so that the corresponding singular value is moved to the
289 * end.
290 *
291 * If two values in the D-vector are close, we perform a two-sided
292 * rotation designed to make one of the corresponding z-vector
293 * entries zero, and then permute the array so that the deflated
294 * singular value is moved to the end.
295 *
296 * If there are multiple singular values then the problem deflates.
297 * Here the number of equal singular values are found. As each equal
298 * singular value is found, an elementary reflector is computed to
299 * rotate the corresponding singular subspace so that the
300 * corresponding components of Z are zero in this new basis.
301 *
302 K = 1
303 K2 = N + 1
304 DO 80 J = 2, N
305 IF( ABS( Z( J ) ).LE.TOL ) THEN
306 *
307 * Deflate due to small z component.
308 *
309 K2 = K2 - 1
310 IDXP( K2 ) = J
311 COLTYP( J ) = 4
312 IF( J.EQ.N )
313 $ GO TO 120
314 ELSE
315 JPREV = J
316 GO TO 90
317 END IF
318 80 CONTINUE
319 90 CONTINUE
320 J = JPREV
321 100 CONTINUE
322 J = J + 1
323 IF( J.GT.N )
324 $ GO TO 110
325 IF( ABS( Z( J ) ).LE.TOL ) THEN
326 *
327 * Deflate due to small z component.
328 *
329 K2 = K2 - 1
330 IDXP( K2 ) = J
331 COLTYP( J ) = 4
332 ELSE
333 *
334 * Check if singular values are close enough to allow deflation.
335 *
336 IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
337 *
338 * Deflation is possible.
339 *
340 S = Z( JPREV )
341 C = Z( J )
342 *
343 * Find sqrt(a**2+b**2) without overflow or
344 * destructive underflow.
345 *
346 TAU = DLAPY2( C, S )
347 C = C / TAU
348 S = -S / TAU
349 Z( J ) = TAU
350 Z( JPREV ) = ZERO
351 *
352 * Apply back the Givens rotation to the left and right
353 * singular vector matrices.
354 *
355 IDXJP = IDXQ( IDX( JPREV )+1 )
356 IDXJ = IDXQ( IDX( J )+1 )
357 IF( IDXJP.LE.NLP1 ) THEN
358 IDXJP = IDXJP - 1
359 END IF
360 IF( IDXJ.LE.NLP1 ) THEN
361 IDXJ = IDXJ - 1
362 END IF
363 CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
364 CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
365 $ S )
366 IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
367 COLTYP( J ) = 3
368 END IF
369 COLTYP( JPREV ) = 4
370 K2 = K2 - 1
371 IDXP( K2 ) = JPREV
372 JPREV = J
373 ELSE
374 K = K + 1
375 U2( K, 1 ) = Z( JPREV )
376 DSIGMA( K ) = D( JPREV )
377 IDXP( K ) = JPREV
378 JPREV = J
379 END IF
380 END IF
381 GO TO 100
382 110 CONTINUE
383 *
384 * Record the last singular value.
385 *
386 K = K + 1
387 U2( K, 1 ) = Z( JPREV )
388 DSIGMA( K ) = D( JPREV )
389 IDXP( K ) = JPREV
390 *
391 120 CONTINUE
392 *
393 * Count up the total number of the various types of columns, then
394 * form a permutation which positions the four column types into
395 * four groups of uniform structure (although one or more of these
396 * groups may be empty).
397 *
398 DO 130 J = 1, 4
399 CTOT( J ) = 0
400 130 CONTINUE
401 DO 140 J = 2, N
402 CT = COLTYP( J )
403 CTOT( CT ) = CTOT( CT ) + 1
404 140 CONTINUE
405 *
406 * PSM(*) = Position in SubMatrix (of types 1 through 4)
407 *
408 PSM( 1 ) = 2
409 PSM( 2 ) = 2 + CTOT( 1 )
410 PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
411 PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
412 *
413 * Fill out the IDXC array so that the permutation which it induces
414 * will place all type-1 columns first, all type-2 columns next,
415 * then all type-3's, and finally all type-4's, starting from the
416 * second column. This applies similarly to the rows of VT.
417 *
418 DO 150 J = 2, N
419 JP = IDXP( J )
420 CT = COLTYP( JP )
421 IDXC( PSM( CT ) ) = J
422 PSM( CT ) = PSM( CT ) + 1
423 150 CONTINUE
424 *
425 * Sort the singular values and corresponding singular vectors into
426 * DSIGMA, U2, and VT2 respectively. The singular values/vectors
427 * which were not deflated go into the first K slots of DSIGMA, U2,
428 * and VT2 respectively, while those which were deflated go into the
429 * last N - K slots, except that the first column/row will be treated
430 * separately.
431 *
432 DO 160 J = 2, N
433 JP = IDXP( J )
434 DSIGMA( J ) = D( JP )
435 IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
436 IF( IDXJ.LE.NLP1 ) THEN
437 IDXJ = IDXJ - 1
438 END IF
439 CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
440 CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
441 160 CONTINUE
442 *
443 * Determine DSIGMA(1), DSIGMA(2) and Z(1)
444 *
445 DSIGMA( 1 ) = ZERO
446 HLFTOL = TOL / TWO
447 IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
448 $ DSIGMA( 2 ) = HLFTOL
449 IF( M.GT.N ) THEN
450 Z( 1 ) = DLAPY2( Z1, Z( M ) )
451 IF( Z( 1 ).LE.TOL ) THEN
452 C = ONE
453 S = ZERO
454 Z( 1 ) = TOL
455 ELSE
456 C = Z1 / Z( 1 )
457 S = Z( M ) / Z( 1 )
458 END IF
459 ELSE
460 IF( ABS( Z1 ).LE.TOL ) THEN
461 Z( 1 ) = TOL
462 ELSE
463 Z( 1 ) = Z1
464 END IF
465 END IF
466 *
467 * Move the rest of the updating row to Z.
468 *
469 CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
470 *
471 * Determine the first column of U2, the first row of VT2 and the
472 * last row of VT.
473 *
474 CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
475 U2( NLP1, 1 ) = ONE
476 IF( M.GT.N ) THEN
477 DO 170 I = 1, NLP1
478 VT( M, I ) = -S*VT( NLP1, I )
479 VT2( 1, I ) = C*VT( NLP1, I )
480 170 CONTINUE
481 DO 180 I = NLP2, M
482 VT2( 1, I ) = S*VT( M, I )
483 VT( M, I ) = C*VT( M, I )
484 180 CONTINUE
485 ELSE
486 CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
487 END IF
488 IF( M.GT.N ) THEN
489 CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
490 END IF
491 *
492 * The deflated singular values and their corresponding vectors go
493 * into the back of D, U, and V respectively.
494 *
495 IF( N.GT.K ) THEN
496 CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
497 CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
498 $ LDU )
499 CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
500 $ LDVT )
501 END IF
502 *
503 * Copy CTOT into COLTYP for referencing in DLASD3.
504 *
505 DO 190 J = 1, 4
506 COLTYP( J ) = CTOT( J )
507 190 CONTINUE
508 *
509 RETURN
510 *
511 * End of DLASD2
512 *
513 END