1       SUBROUTINE ZGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  2 *
  3 *  -- LAPACK routine (version 3.2) --
  4 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  5 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  6 *     November 2006
  7 *
  8 *     .. Scalar Arguments ..
  9       INTEGER            INFO, KL, KU, LDAB, M, N
 10 *     ..
 11 *     .. Array Arguments ..
 12       INTEGER            IPIV( * )
 13       COMPLEX*16         AB( LDAB, * )
 14 *     ..
 15 *
 16 *  Purpose
 17 *  =======
 18 *
 19 *  ZGBTRF computes an LU factorization of a complex m-by-n band matrix A
 20 *  using partial pivoting with row interchanges.
 21 *
 22 *  This is the blocked version of the algorithm, calling Level 3 BLAS.
 23 *
 24 *  Arguments
 25 *  =========
 26 *
 27 *  M       (input) INTEGER
 28 *          The number of rows of the matrix A.  M >= 0.
 29 *
 30 *  N       (input) INTEGER
 31 *          The number of columns of the matrix A.  N >= 0.
 32 *
 33 *  KL      (input) INTEGER
 34 *          The number of subdiagonals within the band of A.  KL >= 0.
 35 *
 36 *  KU      (input) INTEGER
 37 *          The number of superdiagonals within the band of A.  KU >= 0.
 38 *
 39 *  AB      (input/output) COMPLEX*16 array, dimension (LDAB,N)
 40 *          On entry, the matrix A in band storage, in rows KL+1 to
 41 *          2*KL+KU+1; rows 1 to KL of the array need not be set.
 42 *          The j-th column of A is stored in the j-th column of the
 43 *          array AB as follows:
 44 *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
 45 *
 46 *          On exit, details of the factorization: U is stored as an
 47 *          upper triangular band matrix with KL+KU superdiagonals in
 48 *          rows 1 to KL+KU+1, and the multipliers used during the
 49 *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
 50 *          See below for further details.
 51 *
 52 *  LDAB    (input) INTEGER
 53 *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.
 54 *
 55 *  IPIV    (output) INTEGER array, dimension (min(M,N))
 56 *          The pivot indices; for 1 <= i <= min(M,N), row i of the
 57 *          matrix was interchanged with row IPIV(i).
 58 *
 59 *  INFO    (output) INTEGER
 60 *          = 0: successful exit
 61 *          < 0: if INFO = -i, the i-th argument had an illegal value
 62 *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
 63 *               has been completed, but the factor U is exactly
 64 *               singular, and division by zero will occur if it is used
 65 *               to solve a system of equations.
 66 *
 67 *  Further Details
 68 *  ===============
 69 *
 70 *  The band storage scheme is illustrated by the following example, when
 71 *  M = N = 6, KL = 2, KU = 1:
 72 *
 73 *  On entry:                       On exit:
 74 *
 75 *      *    *    *    +    +    +       *    *    *   u14  u25  u36
 76 *      *    *    +    +    +    +       *    *   u13  u24  u35  u46
 77 *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56
 78 *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66
 79 *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *
 80 *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *
 81 *
 82 *  Array elements marked * are not used by the routine; elements marked
 83 *  + need not be set on entry, but are required by the routine to store
 84 *  elements of U because of fill-in resulting from the row interchanges.
 85 *
 86 *  =====================================================================
 87 *
 88 *     .. Parameters ..
 89       COMPLEX*16         ONE, ZERO
 90       PARAMETER          ( ONE = ( 1.0D+00.0D+0 ),
 91      $                   ZERO = ( 0.0D+00.0D+0 ) )
 92       INTEGER            NBMAX, LDWORK
 93       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )
 94 *     ..
 95 *     .. Local Scalars ..
 96       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
 97      $                   JU, K2, KM, KV, NB, NW
 98       COMPLEX*16         TEMP
 99 *     ..
100 *     .. Local Arrays ..
101       COMPLEX*16         WORK13( LDWORK, NBMAX ),
102      $                   WORK31( LDWORK, NBMAX )
103 *     ..
104 *     .. External Functions ..
105       INTEGER            ILAENV, IZAMAX
106       EXTERNAL           ILAENV, IZAMAX
107 *     ..
108 *     .. External Subroutines ..
109       EXTERNAL           XERBLA, ZCOPY, ZGBTF2, ZGEMM, ZGERU, ZLASWP,
110      $                   ZSCAL, ZSWAP, ZTRSM
111 *     ..
112 *     .. Intrinsic Functions ..
113       INTRINSIC          MAXMIN
114 *     ..
115 *     .. Executable Statements ..
116 *
117 *     KV is the number of superdiagonals in the factor U, allowing for
118 *     fill-in
119 *
120       KV = KU + KL
121 *
122 *     Test the input parameters.
123 *
124       INFO = 0
125       IF( M.LT.0 ) THEN
126          INFO = -1
127       ELSE IF( N.LT.0 ) THEN
128          INFO = -2
129       ELSE IF( KL.LT.0 ) THEN
130          INFO = -3
131       ELSE IF( KU.LT.0 ) THEN
132          INFO = -4
133       ELSE IF( LDAB.LT.KL+KV+1 ) THEN
134          INFO = -6
135       END IF
136       IF( INFO.NE.0 ) THEN
137          CALL XERBLA( 'ZGBTRF'-INFO )
138          RETURN
139       END IF
140 *
141 *     Quick return if possible
142 *
143       IF( M.EQ.0 .OR. N.EQ.0 )
144      $   RETURN
145 *
146 *     Determine the block size for this environment
147 *
148       NB = ILAENV( 1'ZGBTRF'' ', M, N, KL, KU )
149 *
150 *     The block size must not exceed the limit set by the size of the
151 *     local arrays WORK13 and WORK31.
152 *
153       NB = MIN( NB, NBMAX )
154 *
155       IF( NB.LE.1 .OR. NB.GT.KL ) THEN
156 *
157 *        Use unblocked code
158 *
159          CALL ZGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
160       ELSE
161 *
162 *        Use blocked code
163 *
164 *        Zero the superdiagonal elements of the work array WORK13
165 *
166          DO 20 J = 1, NB
167             DO 10 I = 1, J - 1
168                WORK13( I, J ) = ZERO
169    10       CONTINUE
170    20    CONTINUE
171 *
172 *        Zero the subdiagonal elements of the work array WORK31
173 *
174          DO 40 J = 1, NB
175             DO 30 I = J + 1, NB
176                WORK31( I, J ) = ZERO
177    30       CONTINUE
178    40    CONTINUE
179 *
180 *        Gaussian elimination with partial pivoting
181 *
182 *        Set fill-in elements in columns KU+2 to KV to zero
183 *
184          DO 60 J = KU + 2MIN( KV, N )
185             DO 50 I = KV - J + 2, KL
186                AB( I, J ) = ZERO
187    50       CONTINUE
188    60    CONTINUE
189 *
190 *        JU is the index of the last column affected by the current
191 *        stage of the factorization
192 *
193          JU = 1
194 *
195          DO 180 J = 1MIN( M, N ), NB
196             JB = MIN( NB, MIN( M, N )-J+1 )
197 *
198 *           The active part of the matrix is partitioned
199 *
200 *              A11   A12   A13
201 *              A21   A22   A23
202 *              A31   A32   A33
203 *
204 *           Here A11, A21 and A31 denote the current block of JB columns
205 *           which is about to be factorized. The number of rows in the
206 *           partitioning are JB, I2, I3 respectively, and the numbers
207 *           of columns are JB, J2, J3. The superdiagonal elements of A13
208 *           and the subdiagonal elements of A31 lie outside the band.
209 *
210             I2 = MIN( KL-JB, M-J-JB+1 )
211             I3 = MIN( JB, M-J-KL+1 )
212 *
213 *           J2 and J3 are computed after JU has been updated.
214 *
215 *           Factorize the current block of JB columns
216 *
217             DO 80 JJ = J, J + JB - 1
218 *
219 *              Set fill-in elements in column JJ+KV to zero
220 *
221                IF( JJ+KV.LE.N ) THEN
222                   DO 70 I = 1, KL
223                      AB( I, JJ+KV ) = ZERO
224    70             CONTINUE
225                END IF
226 *
227 *              Find pivot and test for singularity. KM is the number of
228 *              subdiagonal elements in the current column.
229 *
230                KM = MIN( KL, M-JJ )
231                JP = IZAMAX( KM+1, AB( KV+1, JJ ), 1 )
232                IPIV( JJ ) = JP + JJ - J
233                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
234                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
235                   IF( JP.NE.1 ) THEN
236 *
237 *                    Apply interchange to columns J to J+JB-1
238 *
239                      IF( JP+JJ-1.LT.J+KL ) THEN
240 *
241                         CALL ZSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
242      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )
243                      ELSE
244 *
245 *                       The interchange affects columns J to JJ-1 of A31
246 *                       which are stored in the work array WORK31
247 *
248                         CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
249      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )
250                         CALL ZSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
251      $                              AB( KV+JP, JJ ), LDAB-1 )
252                      END IF
253                   END IF
254 *
255 *                 Compute multipliers
256 *
257                   CALL ZSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
258      $                        1 )
259 *
260 *                 Update trailing submatrix within the band and within
261 *                 the current block. JM is the index of the last column
262 *                 which needs to be updated.
263 *
264                   JM = MIN( JU, J+JB-1 )
265                   IF( JM.GT.JJ )
266      $               CALL ZGERU( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
267      $                           AB( KV, JJ+1 ), LDAB-1,
268      $                           AB( KV+1, JJ+1 ), LDAB-1 )
269                ELSE
270 *
271 *                 If pivot is zero, set INFO to the index of the pivot
272 *                 unless a zero pivot has already been found.
273 *
274                   IF( INFO.EQ.0 )
275      $               INFO = JJ
276                END IF
277 *
278 *              Copy current column of A31 into the work array WORK31
279 *
280                NW = MIN( JJ-J+1, I3 )
281                IF( NW.GT.0 )
282      $            CALL ZCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
283      $                        WORK31( 1, JJ-J+1 ), 1 )
284    80       CONTINUE
285             IF( J+JB.LE.N ) THEN
286 *
287 *              Apply the row interchanges to the other blocks.
288 *
289                J2 = MIN( JU-J+1, KV ) - JB
290                J3 = MAX0, JU-J-KV+1 )
291 *
292 *              Use ZLASWP to apply the row interchanges to A12, A22, and
293 *              A32.
294 *
295                CALL ZLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-11, JB,
296      $                      IPIV( J ), 1 )
297 *
298 *              Adjust the pivot indices.
299 *
300                DO 90 I = J, J + JB - 1
301                   IPIV( I ) = IPIV( I ) + J - 1
302    90          CONTINUE
303 *
304 *              Apply the row interchanges to A13, A23, and A33
305 *              columnwise.
306 *
307                K2 = J - 1 + JB + J2
308                DO 110 I = 1, J3
309                   JJ = K2 + I
310                   DO 100 II = J + I - 1, J + JB - 1
311                      IP = IPIV( II )
312                      IF( IP.NE.II ) THEN
313                         TEMP = AB( KV+1+II-JJ, JJ )
314                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
315                         AB( KV+1+IP-JJ, JJ ) = TEMP
316                      END IF
317   100             CONTINUE
318   110          CONTINUE
319 *
320 *              Update the relevant part of the trailing submatrix
321 *
322                IF( J2.GT.0 ) THEN
323 *
324 *                 Update A12
325 *
326                   CALL ZTRSM( 'Left''Lower''No transpose''Unit',
327      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,
328      $                        AB( KV+1-JB, J+JB ), LDAB-1 )
329 *
330                   IF( I2.GT.0 ) THEN
331 *
332 *                    Update A22
333 *
334                      CALL ZGEMM( 'No transpose''No transpose', I2, J2,
335      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
336      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
337      $                           AB( KV+1, J+JB ), LDAB-1 )
338                   END IF
339 *
340                   IF( I3.GT.0 ) THEN
341 *
342 *                    Update A32
343 *
344                      CALL ZGEMM( 'No transpose''No transpose', I3, J2,
345      $                           JB, -ONE, WORK31, LDWORK,
346      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,
347      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )
348                   END IF
349                END IF
350 *
351                IF( J3.GT.0 ) THEN
352 *
353 *                 Copy the lower triangle of A13 into the work array
354 *                 WORK13
355 *
356                   DO 130 JJ = 1, J3
357                      DO 120 II = JJ, JB
358                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
359   120                CONTINUE
360   130             CONTINUE
361 *
362 *                 Update A13 in the work array
363 *
364                   CALL ZTRSM( 'Left''Lower''No transpose''Unit',
365      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,
366      $                        WORK13, LDWORK )
367 *
368                   IF( I2.GT.0 ) THEN
369 *
370 *                    Update A23
371 *
372                      CALL ZGEMM( 'No transpose''No transpose', I2, J3,
373      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
374      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
375      $                           LDAB-1 )
376                   END IF
377 *
378                   IF( I3.GT.0 ) THEN
379 *
380 *                    Update A33
381 *
382                      CALL ZGEMM( 'No transpose''No transpose', I3, J3,
383      $                           JB, -ONE, WORK31, LDWORK, WORK13,
384      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
385                   END IF
386 *
387 *                 Copy the lower triangle of A13 back into place
388 *
389                   DO 150 JJ = 1, J3
390                      DO 140 II = JJ, JB
391                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
392   140                CONTINUE
393   150             CONTINUE
394                END IF
395             ELSE
396 *
397 *              Adjust the pivot indices.
398 *
399                DO 160 I = J, J + JB - 1
400                   IPIV( I ) = IPIV( I ) + J - 1
401   160          CONTINUE
402             END IF
403 *
404 *           Partially undo the interchanges in the current block to
405 *           restore the upper triangular form of A31 and copy the upper
406 *           triangle of A31 back into place
407 *
408             DO 170 JJ = J + JB - 1, J, -1
409                JP = IPIV( JJ ) - JJ + 1
410                IF( JP.NE.1 ) THEN
411 *
412 *                 Apply interchange to columns J to JJ-1
413 *
414                   IF( JP+JJ-1.LT.J+KL ) THEN
415 *
416 *                    The interchange does not affect A31
417 *
418                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
419      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )
420                   ELSE
421 *
422 *                    The interchange does affect A31
423 *
424                      CALL ZSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
425      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )
426                   END IF
427                END IF
428 *
429 *              Copy the current column of A31 back into place
430 *
431                NW = MIN( I3, JJ-J+1 )
432                IF( NW.GT.0 )
433      $            CALL ZCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
434      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )
435   170       CONTINUE
436   180    CONTINUE
437       END IF
438 *
439       RETURN
440 *
441 *     End of ZGBTRF
442 *
443       END