dgbtrf.f (flens/lapack/interface/ref

  1       SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )

  2 *

  3 *  -- LAPACK routine (version 3.2) --

  4 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

  5 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

  6 *     November 2006

  7 *

  8 *     .. Scalar Arguments ..

  9       INTEGER            INFO, KL, KU, LDAB, M, N

 10 *     ..

 11 *     .. Array Arguments ..

 12       INTEGER            IPIV( * )

 13       DOUBLE PRECISION   AB( LDAB, * )

 14 *     ..

 15 *

 16 *  Purpose

 17 *  =======

 18 *

 19 *  DGBTRF computes an LU factorization of a real m-by-n band matrix A

 20 *  using partial pivoting with row interchanges.

 21 *

 22 *  This is the blocked version of the algorithm, calling Level 3 BLAS.

 23 *

 24 *  Arguments

 25 *  =========

 26 *

 27 *  M       (input) INTEGER

 28 *          The number of rows of the matrix A.  M >= 0.

 29 *

 30 *  N       (input) INTEGER

 31 *          The number of columns of the matrix A.  N >= 0.

 32 *

 33 *  KL      (input) INTEGER

 34 *          The number of subdiagonals within the band of A.  KL >= 0.

 35 *

 36 *  KU      (input) INTEGER

 37 *          The number of superdiagonals within the band of A.  KU >= 0.

 38 *

 39 *  AB      (input/output) DOUBLE PRECISION array, dimension (LDAB,N)

 40 *          On entry, the matrix A in band storage, in rows KL+1 to

 41 *          2*KL+KU+1; rows 1 to KL of the array need not be set.

 42 *          The j-th column of A is stored in the j-th column of the

 43 *          array AB as follows:

 44 *          AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)

 45 *

 46 *          On exit, details of the factorization: U is stored as an

 47 *          upper triangular band matrix with KL+KU superdiagonals in

 48 *          rows 1 to KL+KU+1, and the multipliers used during the

 49 *          factorization are stored in rows KL+KU+2 to 2*KL+KU+1.

 50 *          See below for further details.

 51 *

 52 *  LDAB    (input) INTEGER

 53 *          The leading dimension of the array AB.  LDAB >= 2*KL+KU+1.

 54 *

 55 *  IPIV    (output) INTEGER array, dimension (min(M,N))

 56 *          The pivot indices; for 1 <= i <= min(M,N), row i of the

 57 *          matrix was interchanged with row IPIV(i).

 58 *

 59 *  INFO    (output) INTEGER

 60 *          = 0: successful exit

 61 *          < 0: if INFO = -i, the i-th argument had an illegal value

 62 *          > 0: if INFO = +i, U(i,i) is exactly zero. The factorization

 63 *               has been completed, but the factor U is exactly

 64 *               singular, and division by zero will occur if it is used

 65 *               to solve a system of equations.

 66 *

 67 *  Further Details

 68 *  ===============

 69 *

 70 *  The band storage scheme is illustrated by the following example, when

 71 *  M = N = 6, KL = 2, KU = 1:

 72 *

 73 *  On entry:                       On exit:

 74 *

 75 *      *    *    *    +    +    +       *    *    *   u14  u25  u36

 76 *      *    *    +    +    +    +       *    *   u13  u24  u35  u46

 77 *      *   a12  a23  a34  a45  a56      *   u12  u23  u34  u45  u56

 78 *     a11  a22  a33  a44  a55  a66     u11  u22  u33  u44  u55  u66

 79 *     a21  a32  a43  a54  a65   *      m21  m32  m43  m54  m65   *

 80 *     a31  a42  a53  a64   *    *      m31  m42  m53  m64   *    *

 81 *

 82 *  Array elements marked * are not used by the routine; elements marked

 83 *  + need not be set on entry, but are required by the routine to store

 84 *  elements of U because of fill-in resulting from the row interchanges.

 85 *

 86 *  =====================================================================

 87 *

 88 *     .. Parameters ..

 89       DOUBLE PRECISION   ONE, ZERO

 90       PARAMETER          ( ONE = 1.0D+0, ZERO = 0.0D+0 )

 91       INTEGER            NBMAX, LDWORK

 92       PARAMETER          ( NBMAX = 64, LDWORK = NBMAX+1 )

 93 *     ..

 94 *     .. Local Scalars ..

 95       INTEGER            I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,

 96      $                   JU, K2, KM, KV, NB, NW

 97       DOUBLE PRECISION   TEMP

 98 *     ..

 99 *     .. Local Arrays ..

100       DOUBLE PRECISION   WORK13( LDWORK, NBMAX ),

101      $                   WORK31( LDWORK, NBMAX )

102 *     ..

103 *     .. External Functions ..

104       INTEGER            IDAMAX, ILAENV

105       EXTERNAL           IDAMAX, ILAENV

106 *     ..

107 *     .. External Subroutines ..

108       EXTERNAL           DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,

109      $                   DSWAP, DTRSM, XERBLA

110 *     ..

111 *     .. Intrinsic Functions ..

112       INTRINSIC          MAX, MIN

113 *     ..

114 *     .. Executable Statements ..

115 *

116 *     KV is the number of superdiagonals in the factor U, allowing for

117 *     fill-in

118 *

119       KV = KU + KL

120 *

121 *     Test the input parameters.

122 *

123       INFO = 0

124       IF( M.LT.0 ) THEN

125          INFO = -1

126       ELSE IF( N.LT.0 ) THEN

127          INFO = -2

128       ELSE IF( KL.LT.0 ) THEN

129          INFO = -3

130       ELSE IF( KU.LT.0 ) THEN

131          INFO = -4

132       ELSE IF( LDAB.LT.KL+KV+1 ) THEN

133          INFO = -6

134       END IF

135       IF( INFO.NE.0 ) THEN

136          CALL XERBLA( 'DGBTRF', -INFO )

137          RETURN

138       END IF

139 *

140 *     Quick return if possible

141 *

142       IF( M.EQ.0 .OR. N.EQ.0 )

143      $   RETURN

144 *

145 *     Determine the block size for this environment

146 *

147       NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )

148 *

149 *     The block size must not exceed the limit set by the size of the

150 *     local arrays WORK13 and WORK31.

151 *

152       NB = MIN( NB, NBMAX )

153 *

154       IF( NB.LE.1 .OR. NB.GT.KL ) THEN

155 *

156 *        Use unblocked code

157 *

158          CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )

159       ELSE

160 *

161 *        Use blocked code

162 *

163 *        Zero the superdiagonal elements of the work array WORK13

164 *

165          DO 20 J = 1, NB

166             DO 10 I = 1, J - 1

167                WORK13( I, J ) = ZERO

168    10       CONTINUE

169    20    CONTINUE

170 *

171 *        Zero the subdiagonal elements of the work array WORK31

172 *

173          DO 40 J = 1, NB

174             DO 30 I = J + 1, NB

175                WORK31( I, J ) = ZERO

176    30       CONTINUE

177    40    CONTINUE

178 *

179 *        Gaussian elimination with partial pivoting

180 *

181 *        Set fill-in elements in columns KU+2 to KV to zero

182 *

183          DO 60 J = KU + 2, MIN( KV, N )

184             DO 50 I = KV - J + 2, KL

185                AB( I, J ) = ZERO

186    50       CONTINUE

187    60    CONTINUE

188 *

189 *        JU is the index of the last column affected by the current

190 *        stage of the factorization

191 *

192          JU = 1

193 *

194          DO 180 J = 1, MIN( M, N ), NB

195             JB = MIN( NB, MIN( M, N )-J+1 )

196 *

197 *           The active part of the matrix is partitioned

198 *

199 *              A11   A12   A13

200 *              A21   A22   A23

201 *              A31   A32   A33

202 *

203 *           Here A11, A21 and A31 denote the current block of JB columns

204 *           which is about to be factorized. The number of rows in the

205 *           partitioning are JB, I2, I3 respectively, and the numbers

206 *           of columns are JB, J2, J3. The superdiagonal elements of A13

207 *           and the subdiagonal elements of A31 lie outside the band.

208 *

209             I2 = MIN( KL-JB, M-J-JB+1 )

210             I3 = MIN( JB, M-J-KL+1 )

211 *

212 *           J2 and J3 are computed after JU has been updated.

213 *

214 *           Factorize the current block of JB columns

215 *

216             DO 80 JJ = J, J + JB - 1

217 *

218 *              Set fill-in elements in column JJ+KV to zero

219 *

220                IF( JJ+KV.LE.N ) THEN

221                   DO 70 I = 1, KL

222                      AB( I, JJ+KV ) = ZERO

223    70             CONTINUE

224                END IF

225 *

226 *              Find pivot and test for singularity. KM is the number of

227 *              subdiagonal elements in the current column.

228 *

229                KM = MIN( KL, M-JJ )

230                JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )

231                IPIV( JJ ) = JP + JJ - J

232                IF( AB( KV+JP, JJ ).NE.ZERO ) THEN

233                   JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )

234                   IF( JP.NE.1 ) THEN

235 *

236 *                    Apply interchange to columns J to J+JB-1

237 *

238                      IF( JP+JJ-1.LT.J+KL ) THEN

239 *

240                         CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,

241      $                              AB( KV+JP+JJ-J, J ), LDAB-1 )

242                      ELSE

243 *

244 *                       The interchange affects columns J to JJ-1 of A31

245 *                       which are stored in the work array WORK31

246 *

247                         CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,

248      $                              WORK31( JP+JJ-J-KL, 1 ), LDWORK )

249                         CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,

250      $                              AB( KV+JP, JJ ), LDAB-1 )

251                      END IF

252                   END IF

253 *

254 *                 Compute multipliers

255 *

256                   CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),

257      $                        1 )

258 *

259 *                 Update trailing submatrix within the band and within

260 *                 the current block. JM is the index of the last column

261 *                 which needs to be updated.

262 *

263                   JM = MIN( JU, J+JB-1 )

264                   IF( JM.GT.JJ )

265      $               CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,

266      $                          AB( KV, JJ+1 ), LDAB-1,

267      $                          AB( KV+1, JJ+1 ), LDAB-1 )

268                ELSE

269 *

270 *                 If pivot is zero, set INFO to the index of the pivot

271 *                 unless a zero pivot has already been found.

272 *

273                   IF( INFO.EQ.0 )

274      $               INFO = JJ

275                END IF

276 *

277 *              Copy current column of A31 into the work array WORK31

278 *

279                NW = MIN( JJ-J+1, I3 )

280                IF( NW.GT.0 )

281      $            CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,

282      $                        WORK31( 1, JJ-J+1 ), 1 )

283    80       CONTINUE

284             IF( J+JB.LE.N ) THEN

285 *

286 *              Apply the row interchanges to the other blocks.

287 *

288                J2 = MIN( JU-J+1, KV ) - JB

289                J3 = MAX( 0, JU-J-KV+1 )

290 *

291 *              Use DLASWP to apply the row interchanges to A12, A22, and

292 *              A32.

293 *

294                CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,

295      $                      IPIV( J ), 1 )

296 *

297 *              Adjust the pivot indices.

298 *

299                DO 90 I = J, J + JB - 1

300                   IPIV( I ) = IPIV( I ) + J - 1

301    90          CONTINUE

302 *

303 *              Apply the row interchanges to A13, A23, and A33

304 *              columnwise.

305 *

306                K2 = J - 1 + JB + J2

307                DO 110 I = 1, J3

308                   JJ = K2 + I

309                   DO 100 II = J + I - 1, J + JB - 1

310                      IP = IPIV( II )

311                      IF( IP.NE.II ) THEN

312                         TEMP = AB( KV+1+II-JJ, JJ )

313                         AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )

314                         AB( KV+1+IP-JJ, JJ ) = TEMP

315                      END IF

316   100             CONTINUE

317   110          CONTINUE

318 *

319 *              Update the relevant part of the trailing submatrix

320 *

321                IF( J2.GT.0 ) THEN

322 *

323 *                 Update A12

324 *

325                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',

326      $                        JB, J2, ONE, AB( KV+1, J ), LDAB-1,

327      $                        AB( KV+1-JB, J+JB ), LDAB-1 )

328 *

329                   IF( I2.GT.0 ) THEN

330 *

331 *                    Update A22

332 *

333                      CALL DGEMM( 'No transpose', 'No transpose', I2, J2,

334      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,

335      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,

336      $                           AB( KV+1, J+JB ), LDAB-1 )

337                   END IF

338 *

339                   IF( I3.GT.0 ) THEN

340 *

341 *                    Update A32

342 *

343                      CALL DGEMM( 'No transpose', 'No transpose', I3, J2,

344      $                           JB, -ONE, WORK31, LDWORK,

345      $                           AB( KV+1-JB, J+JB ), LDAB-1, ONE,

346      $                           AB( KV+KL+1-JB, J+JB ), LDAB-1 )

347                   END IF

348                END IF

349 *

350                IF( J3.GT.0 ) THEN

351 *

352 *                 Copy the lower triangle of A13 into the work array

353 *                 WORK13

354 *

355                   DO 130 JJ = 1, J3

356                      DO 120 II = JJ, JB

357                         WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )

358   120                CONTINUE

359   130             CONTINUE

360 *

361 *                 Update A13 in the work array

362 *

363                   CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',

364      $                        JB, J3, ONE, AB( KV+1, J ), LDAB-1,

365      $                        WORK13, LDWORK )

366 *

367                   IF( I2.GT.0 ) THEN

368 *

369 *                    Update A23

370 *

371                      CALL DGEMM( 'No transpose', 'No transpose', I2, J3,

372      $                           JB, -ONE, AB( KV+1+JB, J ), LDAB-1,

373      $                           WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),

374      $                           LDAB-1 )

375                   END IF

376 *

377                   IF( I3.GT.0 ) THEN

378 *

379 *                    Update A33

380 *

381                      CALL DGEMM( 'No transpose', 'No transpose', I3, J3,

382      $                           JB, -ONE, WORK31, LDWORK, WORK13,

383      $                           LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )

384                   END IF

385 *

386 *                 Copy the lower triangle of A13 back into place

387 *

388                   DO 150 JJ = 1, J3

389                      DO 140 II = JJ, JB

390                         AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )

391   140                CONTINUE

392   150             CONTINUE

393                END IF

394             ELSE

395 *

396 *              Adjust the pivot indices.

397 *

398                DO 160 I = J, J + JB - 1

399                   IPIV( I ) = IPIV( I ) + J - 1

400   160          CONTINUE

401             END IF

402 *

403 *           Partially undo the interchanges in the current block to

404 *           restore the upper triangular form of A31 and copy the upper

405 *           triangle of A31 back into place

406 *

407             DO 170 JJ = J + JB - 1, J, -1

408                JP = IPIV( JJ ) - JJ + 1

409                IF( JP.NE.1 ) THEN

410 *

411 *                 Apply interchange to columns J to JJ-1

412 *

413                   IF( JP+JJ-1.LT.J+KL ) THEN

414 *

415 *                    The interchange does not affect A31

416 *

417                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,

418      $                           AB( KV+JP+JJ-J, J ), LDAB-1 )

419                   ELSE

420 *

421 *                    The interchange does affect A31

422 *

423                      CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,

424      $                           WORK31( JP+JJ-J-KL, 1 ), LDWORK )

425                   END IF

426                END IF

427 *

428 *              Copy the current column of A31 back into place

429 *

430                NW = MIN( I3, JJ-J+1 )

431                IF( NW.GT.0 )

432      $            CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,

433      $                        AB( KV+KL+1-JJ+J, JJ ), 1 )

434   170       CONTINUE

435   180    CONTINUE

436       END IF

437 *

438       RETURN

439 *

440 *     End of DGBTRF

441 *

442       END