1       SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  2 *
  3 *  -- LAPACK routine (version 3.2) --
  4 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  5 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  6 *     November 2006
  7 *
  8 *     .. Scalar Arguments ..
  9       INTEGER            INFO, KL, KU, LDAB, M, N
 10 *     ..
 11 *     .. Array Arguments ..
 12       INTEGER            IPIV( * )
 13       DOUBLE PRECISION   AB( LDAB, * )
 14 *     ..
 15 *
 16 *  Purpose
 17 *  =======
 18 *
 19 *  DGBTRF computes an LU factorization of a real m-by-n band matrix A
 20 *  using partial pivoting with row interchanges.
 21 *
 22 *  This is the blocked version of the algorithm, calling Level 3 BLAS.
 23 *
 24 *  Arguments
 25 *  =========
 26 *
 27 *  M       (input) INTEGER
 28 *          The number of rows of the matrix A.  M >= 0.
 29 *
 30 *  N       (input) INTEGER
 31 *          The number of columns of the matrix A.  N >= 0.
 32 *
 33 *  KL      (input) INTEGER
 34 *          The number of subdiagonals within the band of A.  KL >= 0.
 35 *
 36 *  KU      (input) INTEGER
 37 *          The number of superdiagonals within the band of A.  KU >= 0.
 38 *
 39 *  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)
 40 *          On entry, the matrix A in band storage, in rows KL+1 to
 41 *          2*KL+KU+1; rows 1 to KL of the array need not be set.
 42 *          The j-th column of A is stored in the j-th column of the
 43 *          array AB as follows:
 44 *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
 45 *
 46 *          On exit, details of the factorization: U is stored as an
 47 *          upper triangular band matrix with KL+KU superdiagonals in
 48 *          rows 1 to KL+KU+1, and the multipliers used during the
 49 *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
 50 *          See below for further details.
 51 *
 52 *  LDAB    (input) INTEGER
 53 *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
 54 *
 55 *  IPIV    (output) INTEGER array, dimension (min(M,N))
 56 *          The pivot indices; for 1 <= i <= min(M,N), row i of the
 57 *          matrix was interchanged with row IPIV(i).
 58 *
 59 *  INFO    (output) INTEGER
 60 *          = 0: successful exit
 61 *          < 0: if INFO = -i, the i-th argument had an illegal value
 62 *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
 63 *               has been completed, but the factor U is exactly
 64 *               singular, and division by zero will occur if it is used
 65 *               to solve a system of equations.
 66 *
 67 *  Further Details
 68 *  ===============
 69 *
 70 *  The band storage scheme is illustrated by the following example, when
 71 *  M = N = 6, KL = 2, KU = 1:
 72 *
 73 *  On entry:                       On exit:
 74 *
 75 *      *    *    *    +    +    +       *    *    *   u14  u25  u36
 76 *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
 77 *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
 78 *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
 79 *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
 80 *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
 81 *
 82 *  Array elements marked * are not used by the routine; elements marked
 83 *  + need not be set on entry, but are required by the routine to store
 84 *  elements of U because of fill-in resulting from the row interchanges.
 85 *
 86 *  =====================================================================
 87 *
 88 *     .. Parameters ..
 89       DOUBLE PRECISION   ONE, ZERO
 90       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )
 91       INTEGER            NBMAX, LDWORK
 92       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
 93 *     ..
 94 *     .. Local Scalars ..
 95       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
 96      $                   JU, K2, KM, KV, NB, NW
 97       DOUBLE PRECISION   TEMP
 98 *     ..
 99 *     .. Local Arrays ..
100       DOUBLE PRECISION   WORK13( LDWORK, NBMAX ),
101      $                   WORK31( LDWORK, NBMAX )
102 *     ..
103 *     .. External Functions ..
104       INTEGER            IDAMAX, ILAENV
105       EXTERNAL           IDAMAX, ILAENV
106 *     ..
107 *     .. External Subroutines ..
108       EXTERNAL           DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
109      $                   DSWAP, DTRSM, XERBLA
110 *     ..
111 *     .. Intrinsic Functions ..
112       INTRINSIC          MAXMIN
113 *     ..
114 *     .. Executable Statements ..
115 *
116 *     KV is the number of superdiagonals in the factor U, allowing for
117 *     fill-in
118 *
119       KV = KU + KL
120 *
121 *     Test the input parameters.
122 *
123       INFO = 0
124       IF( M.LT.0 ) THEN
125          INFO = -1
126       ELSE IF( N.LT.0 ) THEN
127          INFO = -2
128       ELSE IF( KL.LT.0 ) THEN
129          INFO = -3
130       ELSE IF( KU.LT.0 ) THEN
131          INFO = -4
132       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
133          INFO = -6
134       END IF
135       IF( INFO.NE.0 ) THEN
136          CALL XERBLA( 'DGBTRF'-INFO )
137          RETURN
138       END IF
139 *
140 *     Quick return if possible
141 *
142       IF( M.EQ.0 .OR. N.EQ.0 )
143      $   RETURN
144 *
145 *     Determine the block size for this environment
146 *
147       NB = ILAENV( 1'DGBTRF'' ', M, N, KL, KU )
148 *
149 *     The block size must not exceed the limit set by the size of the
150 *     local arrays WORK13 and WORK31.
151 *
152       NB = MIN( NB, NBMAX )
153 *
154       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
155 *
156 *        Use unblocked code
157 *
158          CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
159       ELSE
160 *
161 *        Use blocked code
162 *
163 *        Zero the superdiagonal elements of the work array WORK13
164 *
165          DO 20 J = 1, NB
166             DO 10 I = 1, J - 1
167                WORK13( I, J ) = ZERO
168    10       CONTINUE
169    20    CONTINUE
170 *
171 *        Zero the subdiagonal elements of the work array WORK31
172 *
173          DO 40 J = 1, NB
174             DO 30 I = J + 1, NB
175                WORK31( I, J ) = ZERO
176    30       CONTINUE
177    40    CONTINUE
178 *
179 *        Gaussian elimination with partial pivoting
180 *
181 *        Set fill-in elements in columns KU+2 to KV to zero
182 *
183          DO 60 J = KU + 2MIN( KV, N )
184             DO 50 I = KV - J + 2, KL
185                AB( I, J ) = ZERO
186    50       CONTINUE
187    60    CONTINUE
188 *
189 *        JU is the index of the last column affected by the current
190 *        stage of the factorization
191 *
192          JU = 1
193 *
194          DO 180 J = 1MIN( M, N ), NB
195             JB = MIN( NB, MIN( M, N )-J+1 )
196 *
197 *           The active part of the matrix is partitioned
198 *
199 *              A11   A12   A13
200 *              A21   A22   A23
201 *              A31   A32   A33
202 *
203 *           Here A11, A21 and A31 denote the current block of JB columns
204 *           which is about to be factorized. The number of rows in the
205 *           partitioning are JB, I2, I3 respectively, and the numbers
206 *           of columns are JB, J2, J3. The superdiagonal elements of A13
207 *           and the subdiagonal elements of A31 lie outside the band.
208 *
209             I2 = MIN( KL-JB, M-J-JB+1 )
210             I3 = MIN( JB, M-J-KL+1 )
211 *
212 *           J2 and J3 are computed after JU has been updated.
213 *
214 *           Factorize the current block of JB columns
215 *
216             DO 80 JJ = J, J + JB - 1
217 *
218 *              Set fill-in elements in column JJ+KV to zero
219 *
220                IF( JJ+KV.LE.N ) THEN
221                   DO 70 I = 1, KL
222                      AB( I, JJ+KV ) = ZERO
223    70             CONTINUE
224                END IF
225 *
226 *              Find pivot and test for singularity. KM is the number of
227 *              subdiagonal elements in the current column.
228 *
229                KM = MIN( KL, M-JJ )
230                JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
231                IPIV( JJ ) = JP + JJ - J
232                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
233                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
234                   IF( JP.NE.1 ) THEN
235 *
236 *                    Apply interchange to columns J to J+JB-1
237 *
238                      IF( JP+JJ-1.LT.J+KL ) THEN
239 *
240                         CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
241      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
242                      ELSE
243 *
244 *                       The interchange affects columns J to JJ-1 of A31
245 *                       which are stored in the work array WORK31
246 *
247                         CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
248      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
249                         CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
250      $                              AB( KV+JP, JJ ), LDAB-1 )
251                      END IF
252                   END IF
253 *
254 *                 Compute multipliers
255 *
256                   CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
257      $                        1 )
258 *
259 *                 Update trailing submatrix within the band and within
260 *                 the current block. JM is the index of the last column
261 *                 which needs to be updated.
262 *
263                   JM = MIN( JU, J+JB-1 )
264                   IF( JM.GT.JJ )
265      $               CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
266      $                          AB( KV, JJ+1 ), LDAB-1,
267      $                          AB( KV+1, JJ+1 ), LDAB-1 )
268                ELSE
269 *
270 *                 If pivot is zero, set INFO to the index of the pivot
271 *                 unless a zero pivot has already been found.
272 *
273                   IF( INFO.EQ.0 )
274      $               INFO = JJ
275                END IF
276 *
277 *              Copy current column of A31 into the work array WORK31
278 *
279                NW = MIN( JJ-J+1, I3 )
280                IF( NW.GT.0 )
281      $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
282      $                        WORK31( 1, JJ-J+1 ), 1 )
283    80       CONTINUE
284             IF( J+JB.LE.N ) THEN
285 *
286 *              Apply the row interchanges to the other blocks.
287 *
288                J2 = MIN( JU-J+1, KV ) - JB
289                J3 = MAX0, JU-J-KV+1 )
290 *
291 *              Use DLASWP to apply the row interchanges to A12, A22, and
292 *              A32.
293 *
294                CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-11, JB,
295      $                      IPIV( J ), 1 )
296 *
297 *              Adjust the pivot indices.
298 *
299                DO 90 I = J, J + JB - 1
300                   IPIV( I ) = IPIV( I ) + J - 1
301    90          CONTINUE
302 *
303 *              Apply the row interchanges to A13, A23, and A33
304 *              columnwise.
305 *
306                K2 = J - 1 + JB + J2
307                DO 110 I = 1, J3
308                   JJ = K2 + I
309                   DO 100 II = J + I - 1, J + JB - 1
310                      IP = IPIV( II )
311                      IF( IP.NE.II ) THEN
312                         TEMP = AB( KV+1+II-JJ, JJ )
313                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
314                         AB( KV+1+IP-JJ, JJ ) = TEMP
315                      END IF
316   100             CONTINUE
317   110          CONTINUE
318 *
319 *              Update the relevant part of the trailing submatrix
320 *
321                IF( J2.GT.0 ) THEN
322 *
323 *                 Update A12
324 *
325                   CALL DTRSM( 'Left''Lower''No transpose''Unit',
326      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
327      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
328 *
329                   IF( I2.GT.0 ) THEN
330 *
331 *                    Update A22
332 *
333                      CALL DGEMM( 'No transpose''No transpose', I2, J2,
334      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
335      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
336      $                           AB( KV+1, J+JB ), LDAB-1 )
337                   END IF
338 *
339                   IF( I3.GT.0 ) THEN
340 *
341 *                    Update A32
342 *
343                      CALL DGEMM( 'No transpose''No transpose', I3, J2,
344      $                           JB, -ONE, WORK31, LDWORK,
345      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
346      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
347                   END IF
348                END IF
349 *
350                IF( J3.GT.0 ) THEN
351 *
352 *                 Copy the lower triangle of A13 into the work array
353 *                 WORK13
354 *
355                   DO 130 JJ = 1, J3
356                      DO 120 II = JJ, JB
357                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
358   120                CONTINUE
359   130             CONTINUE
360 *
361 *                 Update A13 in the work array
362 *
363                   CALL DTRSM( 'Left''Lower''No transpose''Unit',
364      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
365      $                        WORK13, LDWORK )
366 *
367                   IF( I2.GT.0 ) THEN
368 *
369 *                    Update A23
370 *
371                      CALL DGEMM( 'No transpose''No transpose', I2, J3,
372      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
373      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
374      $                           LDAB-1 )
375                   END IF
376 *
377                   IF( I3.GT.0 ) THEN
378 *
379 *                    Update A33
380 *
381                      CALL DGEMM( 'No transpose''No transpose', I3, J3,
382      $                           JB, -ONE, WORK31, LDWORK, WORK13,
383      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
384                   END IF
385 *
386 *                 Copy the lower triangle of A13 back into place
387 *
388                   DO 150 JJ = 1, J3
389                      DO 140 II = JJ, JB
390                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
391   140                CONTINUE
392   150             CONTINUE
393                END IF
394             ELSE
395 *
396 *              Adjust the pivot indices.
397 *
398                DO 160 I = J, J + JB - 1
399                   IPIV( I ) = IPIV( I ) + J - 1
400   160          CONTINUE
401             END IF
402 *
403 *           Partially undo the interchanges in the current block to
404 *           restore the upper triangular form of A31 and copy the upper
405 *           triangle of A31 back into place
406 *
407             DO 170 JJ = J + JB - 1, J, -1
408                JP = IPIV( JJ ) - JJ + 1
409                IF( JP.NE.1 ) THEN
410 *
411 *                 Apply interchange to columns J to JJ-1
412 *
413                   IF( JP+JJ-1.LT.J+KL ) THEN
414 *
415 *                    The interchange does not affect A31
416 *
417                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
418      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
419                   ELSE
420 *
421 *                    The interchange does affect A31
422 *
423                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
424      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
425                   END IF
426                END IF
427 *
428 *              Copy the current column of A31 back into place
429 *
430                NW = MIN( I3, JJ-J+1 )
431                IF( NW.GT.0 )
432      $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
433      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
434   170       CONTINUE
435   180    CONTINUE
436       END IF
437 *
438       RETURN
439 *
440 *     End of DGBTRF
441 *
442       END