laqr0.tcc (flens/lapack/eig/laqr0.tcc)

  1 /*

  2  *   Copyright (c) 2011, Michael Lehn

  3  *

  4  *   All rights reserved.

  5  *

  6  *   Redistribution and use in source and binary forms, with or without

  7  *   modification, are permitted provided that the following conditions

  8  *   are met:

  9  *

 10  *   1) Redistributions of source code must retain the above copyright

 11  *      notice, this list of conditions and the following disclaimer.

 12  *   2) Redistributions in binary form must reproduce the above copyright

 13  *      notice, this list of conditions and the following disclaimer in

 14  *      the documentation and/or other materials provided with the

 15  *      distribution.

 16  *   3) Neither the name of the FLENS development group nor the names of

 17  *      its contributors may be used to endorse or promote products derived

 18  *      from this software without specific prior written permission.

 19  *

 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 31  */

 32 

 33 /* Based on

 34  *

 35        SUBROUTINE DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,

 36       $                   ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO )

 37  *

 38  *  -- LAPACK auxiliary routine (version 3.2) --

 39  *     Univ. of Tennessee, Univ. of California Berkeley,

 40  *     Univ. of Colorado Denver and NAG Ltd..

 41  *     November 2006

 42  */

 43 

 44 #ifndef FLENS_LAPACK_EIG_LAQR0_TCC

 45 #define FLENS_LAPACK_EIG_LAQR0_TCC 1

 46 

 47 #include <flens/blas/blas.h>

 48 #include <flens/lapack/lapack.h>

 49 

 50 namespace flens { namespace lapack {

 51 

 52 //== generic lapack implementation =============================================

 53 

 54 template <typename IndexType, typename MH>

 55 IndexType

 56 laqr0_generic_wsq(bool                  wantT,

 57                   bool                  wantZ,

 58                   IndexType             iLo,

 59                   IndexType             iHi,

 60                   const GeMatrix<MH>    &H)

 61 {

 62     using std::max;

 63     using std::min;

 64 

 65     typedef typename GeMatrix<MH>::ElementType  T;

 66 

 67     const IndexType nTiny   = 11;

 68     const IndexType n       = H.numRows();

 69 

 70     if ((n==0) || (n<=nTiny)) {

 71         return 1;

 72     }

 73 

 74     char job[3];

 75     job[0] = (wantT) ? 'S' : 'E';

 76     job[1] = (wantZ) ? 'V' : 'N';

 77     job[2] = 0;

 78 //  

 79 //  ==== NWR = recommended deflation window size.  At this

 80 //  .    point,  N .GT. NTINY = 11, so there is enough

 81 //  .    subdiagonal workspace for NWR.GE.2 as required.

 82 //  .    (In fact, there is enough subdiagonal space for

 83 //  .    NWR.GE.3.) ====

 84 //  

 85     IndexType nwr = ilaenv<T>(13, "LAQR0", job, n, iLo, iHi, -1);

 86     nwr = max(IndexType(2), nwr);

 87     nwr = min(min(IndexType(iHi-iLo+1), (n-1)/3), nwr);

 88 //

 89 //  ==== NSR = recommended number of simultaneous shifts.

 90 //  .    At this point N .GT. NTINY = 11, so there is at

 91 //  .    enough subdiagonal workspace for NSR to be even

 92 //  .    and greater than or equal to two as required. ====

 93 //

 94     IndexType nsr = ilaenv<T>(15, "LAQR0", job, n, iLo, iHi, -1);

 95     nsr = min(min(nsr, (n+6)/9), IndexType(iHi-iLo));

 96     nsr = max(IndexType(2), nsr-(nsr%2));

 97 //

 98 //  ==== Estimate optimal workspace ====

 99 //

100 //  ==== Workspace query call to DLAQR3 ====

101 //

102     IndexType lWorkOpt = laqr3_wsq(IndexType(iLo), IndexType(iHi), nwr+1, H);

103 //

104 //  ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ====

105 //

106     return max(3*nsr/2, lWorkOpt);

107 }

108 

109 template <typename IndexType, typename MH, typename VWR, typename VWI,

110           typename MZ, typename VWORK>

111 IndexType

112 laqr0_generic(bool                  wantT,

113               bool                  wantZ,

114               IndexType             iLo,

115               IndexType             iHi,

116               GeMatrix<MH>          &H,

117               DenseVector<VWR>      &wr,

118               DenseVector<VWI>      &wi,

119               IndexType             iLoZ,

120               IndexType             iHiZ,

121               GeMatrix<MZ>          &Z,

122               DenseVector<VWORK>    &work)

123 {

124     using std::abs;

125     using std::max;

126     using std::min;

127     using std::swap;

128 

129     typedef typename GeMatrix<MH>::ElementType  T;

130 

131     const Underscore<IndexType>     _;

132 

133     const IndexType n       = H.numRows();

134 

135 //  ==== Matrices of order NTINY or smaller must be processed by

136 //  .    DLAHQR because of insufficient subdiagonal scratch space.

137 //  .    (This is a hard limit.) ====

138     const IndexType nTiny   = 11;

139 

140 //  ==== Exceptional deflation windows:  try to cure rare

141 //  .    slow convergence by varying the size of the

142 //  .    deflation window after KEXNW iterations. ====

143     const IndexType kexNw   = 5;

144 

145 //

146 //  ==== Exceptional shifts: try to cure rare slow convergence

147 //  .    with ad-hoc exceptional shifts every KEXSH iterations.

148 //  .    ====

149     const IndexType kexSh   = 6;

150 

151 //

152 //  ==== The constants WILK1 and WILK2 are used to form the

153 //  .    exceptional shifts. ====

154     const T         wilk1 = T(0.75),

155                     wilk2 = T(-0.4375);

156 

157     const T         Zero(0), One(1);

158 

159     IndexType info          = 0;

160     IndexType lWork, lWorkOpt;

161 

162     IndexType nDec          = -1;

163 

164 //

165 //  ==== Perform and apply a workspace query if necessary ====

166 //

167     if (work.length()==0) {

168         lWorkOpt = laqr0_generic_wsq(wantT, wantZ, iLo, iHi, H);

169         work.resize(lWorkOpt);

170     }

171 

172     lWork = work.length();

173 

174 //

175 //  ==== Quick return for N = 0: nothing to do. ====

176 //

177     if (n==0) {

178         work(1) = One;

179         return info;

180     }

181 

182     if (n<=nTiny) {

183 //

184 //      ==== Tiny matrices must use DLAHQR. ====

185 //

186         lWorkOpt = 1;

187         info = lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);

188     } else {

189 //

190 //      ==== Use small bulge multi-shift QR with aggressive early

191 //      .    deflation on larger-than-tiny matrices. ====

192 //

193 //      ==== Hope for the best. ====

194 //

195         info = 0;

196 //

197 //      ==== Set up job flags for ILAENV. ====

198 //

199         char job[3];

200         job[0] = (wantT) ? 'S' : 'E';

201         job[1] = (wantZ) ? 'V' : 'N';

202         job[2] = 0;

203 //

204 //      ==== NWR = recommended deflation window size.  At this

205 //      .    point,  N .GT. NTINY = 11, so there is enough

206 //      .    subdiagonal workspace for NWR.GE.2 as required.

207 //      .    (In fact, there is enough subdiagonal space for

208 //      .    NWR.GE.3.) ====

209 //

210         IndexType nwr = ilaenv<T>(13, "LAQR0", job, n, iLo, iHi, lWork);

211         nwr = max(IndexType(2), nwr);

212         nwr = min(min(IndexType(iHi-iLo+1), (n-1)/3), nwr);

213 //

214 //      ==== NSR = recommended number of simultaneous shifts.

215 //      .    At this point N .GT. NTINY = 11, so there is at

216 //      .    enough subdiagonal workspace for NSR to be even

217 //      .    and greater than or equal to two as required. ====

218 //

219         IndexType nsr = ilaenv<T>(15, "LAQR0", job, n, iLo, iHi, lWork);

220         nsr = min(min(nsr, (n+6)/9), IndexType(iHi-iLo));

221         nsr = max(IndexType(2), nsr - (nsr%2));

222 //

223 //      ==== Estimate optimal workspace ====

224 //

225 //      ==== Workspace query call to DLAQR3 ====

226 //

227         lWorkOpt = laqr3_wsq(IndexType(iLo), IndexType(iHi), nwr+1, H);

228 //

229 //      ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ====

230 //

231         lWorkOpt = max(3*nsr/2, lWorkOpt);

232 //

233 //      ==== DLAHQR/DLAQR0 crossover point ====

234 //

235         IndexType nMin = ilaenv<T>(12, "LAQR0", job, n, iLo, iHi, lWork);

236         nMin = max(nTiny, nMin);

237 //

238 //      ==== Nibble crossover point ====

239 //

240         IndexType nibble = ilaenv<T>(14, "LAQR0", job, n, iLo, iHi, lWork);

241         nibble = max(IndexType(0), nibble);

242 //

243 //      ==== Accumulate reflections during ttswp?  Use block

244 //      .    2-by-2 structure during matrix-matrix multiply? ====

245 //

246         IndexType kacc22 = ilaenv<T>(16, "LAQR0", job, n, iLo, iHi, lWork);

247         kacc22 = max(IndexType(0), kacc22);

248         kacc22 = min(IndexType(2), kacc22);

249 //

250 //      ==== NWMAX = the largest possible deflation window for

251 //      .    which there is sufficient workspace. ====

252 //

253         IndexType nwMax = min((n-1)/3, lWork/2);

254         IndexType nw = nwMax;

255 //

256 //      ==== NSMAX = the Largest number of simultaneous shifts

257 //      .    for which there is sufficient workspace. ====

258 //

259         IndexType nsMax = min((n+6 )/9, 2*lWork/3);

260         nsMax = nsMax - (nsMax%2);

261 //

262 //      ==== NDFL: an iteration count restarted at deflation. ====

263 //

264         IndexType nDfl = 1;

265 //

266 //      ==== ITMAX = iteration limit ====

267 //

268         IndexType itMax = max(IndexType(30), 2*kexSh)

269                         * max(IndexType(10), IndexType(iHi-iLo+1));

270 //

271 //      ==== Last row and column in the active block ====

272 //

273         IndexType kBot = iHi;

274 //

275 //      ==== Main Loop ====

276 //

277         IndexType it;

278         for (it=1; it<=itMax; ++it) {

279 //

280 //          ==== Done when KBOT falls below ILO ====

281 //

282             if (kBot<iLo) {

283                 break;

284             }

285 //

286 //          ==== Locate active block ====

287 //

288             IndexType k;

289             for (k=kBot; k>=iLo+1; --k) {

290                 if (H(k,k-1)==Zero) {

291                     break;

292                 }

293             }

294             ASSERT(k==iLo || H(k,k-1)==Zero);

295             const IndexType kTop = k;

296 // 

297 //          ==== Select deflation window size:

298 //          .    Typical Case:

299 //          .      If possible and advisable, nibble the entire

300 //          .      active block.  If not, use size MIN(NWR,NWMAX)

301 //          .      or MIN(NWR+1,NWMAX) depending upon which has

302 //          .      the smaller corresponding subdiagonal entry

303 //          .      (a heuristic).

304 //          .

305 //          .    Exceptional Case:

306 //          .      If there have been no deflations in KEXNW or

307 //          .      more iterations, then vary the deflation window

308 //          .      size.   At first, because, larger windows are,

309 //          .      in general, more powerful than smaller ones,

310 //          .      rapidly increase the window to the maximum possible.

311 //          .      Then, gradually reduce the window size. ====

312 // 

313             IndexType nh = kBot - kTop + 1;

314             IndexType nwUpBd = min(nh, nwMax);

315             if (nDfl<kexNw) {

316                 nw = min(nwUpBd, nwr);

317             } else {

318                 nw = min(nwUpBd, 2*nw);

319             }

320             if (nw<nwMax) {

321                 if (nw>=nh-1) {

322                     nw = nh;

323                 } else {

324                     const IndexType kwTop = kBot - nw + 1;

325                     if (abs(H(kwTop,kwTop-1))>abs(H(kwTop-1, kwTop-2))) {

326                         ++nw;

327                     }

328                 }

329             }

330             if (nDfl<kexNw) {

331                 nDec = -1;

332             } else if (nDec>=0 || nw>=nwUpBd) {

333                 ++nDec;

334                 if (nw-nDec<2) {

335                     nDec = 0;

336                 }

337                 nw -= nDec;

338             }

339 // 

340 //          ==== Aggressive early deflation:

341 //          .    split workspace under the subdiagonal into

342 //          .      - an nw-by-nw work array V in the lower

343 //          .        left-hand-corner,

344 //          .      - an NW-by-at-least-NW-but-more-is-better

345 //          .        (NW-by-NHO) horizontal work array along

346 //          .        the bottom edge,

347 //          .      - an at-least-NW-but-more-is-better (NHV-by-NW)

348 //          .        vertical work array along the left-hand-edge.

349 //          .        ====

350 // 

351             auto _V     = H(_(n-nw+1,    n), _(   1,     nw));

352             auto _T     = H(_(n-nw+1,    n), _(nw+1, n-nw-1));

353             auto _WV    = H(_(  nw+2, n-nw), _(   1,     nw));

354 //

355 //          ==== Aggressive early deflation ====

356 //

357             IndexType   ls, ld;

358 

359             laqr3(wantT, wantZ, kTop, kBot, nw, H,

360                   IndexType(iLoZ), IndexType(iHiZ), Z,

361                   ls, ld, wr(_(1,kBot)), wi(_(1,kBot)),

362                   _V, _T, _WV, work);

363 //

364 //          ==== Adjust KBOT accounting for new deflations. ====

365 //

366             kBot -= ld;

367 //

368 //          ==== KS points to the shifts. ====

369 //

370             IndexType ks = kBot - ls + 1;

371 //

372 //          ==== Skip an expensive QR sweep if there is a (partly

373 //          .    heuristic) reason to expect that many eigenvalues

374 //          .    will deflate without it.  Here, the QR sweep is

375 //          .    skipped if many eigenvalues have just been deflated

376 //          .    or if the remaining active block is small.

377 //

378             if ((ld==0)

379              || ((100*ld<=nw*nibble) && (kBot-kTop+1>min(nMin, nwMax))))

380             {

381 //

382 //              ==== NS = nominal number of simultaneous shifts.

383 //              .    This may be lowered (slightly) if DLAQR3

384 //              .    did not provide that many shifts. ====

385 //

386                 IndexType ns = min(min(nsMax, nsr),

387                                    max(IndexType(2), kBot-kTop));

388                 ns = ns - (ns % 2);

389 //

390 //              ==== If there have been no deflations

391 //              .    in a multiple of KEXSH iterations,

392 //              .    then try exceptional shifts.

393 //              .    Otherwise use shifts provided by

394 //              .    DLAQR3 above or from the eigenvalues

395 //              .    of a trailing principal submatrix. ====

396 //

397                 if (nDfl%kexSh==0) {

398                     ks = kBot - ns + 1;

399                     for (IndexType i=kBot; i>=max(ks+1,kTop+2); i-=2) {

400                         const T ss = abs(H(i,i-1)) + abs(H(i-1,i-2));

401                         T aa = wilk1*ss + H(i,i);

402                         T bb = ss;

403                         T cc = wilk2*ss;

404                         T dd = aa;

405                         T cs, sn;

406                         lanv2(aa, bb, cc, dd,

407                               wr(i-1), wi(i-1), wr(i), wi(i),

408                               cs, sn);

409                     }

410                     if (ks==kTop) {

411                         wr(ks+1)    = H(ks+1, ks+1);

412                         wi(ks+1)    = Zero;

413                         wr(ks)      = wr(ks+1);

414                         wi(ks)      = wi(ks+1);

415                     }

416                 } else {

417 //

418 //                  ==== Got NS/2 or fewer shifts? Use DLAQR4 or

419 //                  .    DLAHQR on a trailing principal submatrix to

420 //                  .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,

421 //                  .    there is enough space below the subdiagonal

422 //                  .    to fit an NS-by-NS scratch array.) ====

423 //

424                     if (kBot-ks+1<=ns/2) {

425                         ks = kBot - ns +1;

426                         H(_(ks,kBot),_(1,ns)) = H(_(ks,kBot),_(ks,kBot));

427                         if (ns>nMin) {

428                             // TODO: avoid the need for ZDummy

429                             typename GeMatrix<MZ>::NoView ZDummy;

430                             ks += laqr4(false, false,

431                                         IndexType(1), ns,

432                                         H(_(ks,kBot),_(1,ns)),

433                                         wr(_(ks,kBot)), wi(_(ks,kBot)),

434                                         IndexType(1), IndexType(1),

435                                         ZDummy, work);

436                         } else {

437                             // TODO: avoid the need for ZDummy

438                             typename GeMatrix<MZ>::NoView ZDummy;

439                             ks += lahqr(false, false,

440                                         IndexType(1), ns,

441                                         H(_(ks,kBot),_(1,ns)),

442                                         wr(_(ks,kBot)), wi(_(ks,kBot)),

443                                         IndexType(1), IndexType(1),

444                                         ZDummy);

445                         }

446 //

447 //                      ==== In case of a rare QR failure use

448 //                      .    eigenvalues of the trailing 2-by-2

449 //                      .    principal submatrix.  ====

450 //

451                         if (ks>=kBot) {

452                             T aa = H(kBot-1,kBot-1);

453                             T cc = H(kBot,  kBot-1);

454                             T bb = H(kBot-1,kBot);

455                             T dd = H(kBot,  kBot);

456                             T cs, sn;

457                             lanv2(aa, bb, cc, dd,

458                                   wr(kBot-1), wi(kBot-1), wr(kBot), wi(kBot),

459                                   cs, sn);

460                             ks = kBot - 1;

461                         }

462                     }

463 

464                     if (kBot-ks+1>ns) {

465 //

466 //                      ==== Sort the shifts (Helps a little)

467 //                      .    Bubble sort keeps complex conjugate

468 //                      .    pairs together. ====

469 //

470                         bool sorted = false;

471                         for (IndexType k=kBot; k>=ks+1; --k) {

472                             if (sorted) {

473                                 break;

474                             }

475                             sorted = true;

476                             for (IndexType i=ks; i<=k-1; ++i) {

477                                 if (abs(wr(i))+abs(wi(i))

478                                     < abs(wr(i+1))+abs(wi(i+1)))

479                                 {

480                                     sorted = false;

481                                     swap(wr(i), wr(i+1));

482                                     swap(wi(i), wi(i+1));

483                                 }

484                             }

485                         }

486                     }

487 //

488 //                  ==== Shuffle shifts into pairs of real shifts

489 //                  .    and pairs of complex conjugate shifts

490 //                  .    assuming complex conjugate shifts are

491 //                  .    already adjacent to one another. (Yes,

492 //                  .    they are.)  ====

493 //

494                     for (IndexType i=kBot; i>=ks+2; i-=2) {

495                         if (wi(i)!=-wi(i-1)) {

496                             T tmp  = wr(i);

497                             wr(i)   = wr(i-1);

498                             wr(i-1) = wr(i-2);

499                             wr(i-2) = tmp;

500 

501                             tmp     = wi(i);

502                             wi(i)   = wi(i-1);

503                             wi(i-1) = wi(i-2);

504                             wi(i-2) = tmp;

505                         }

506                     }

507                 }

508 //

509 //              ==== If there are only two shifts and both are

510 //              .    real, then use only one.  ====

511 //

512                 if (kBot-ks+1==2) {

513                     if (wi(kBot)==0) {

514                         const T _H = H(kBot,kBot);

515                         if (abs(wr(kBot)-_H) < abs(wr(kBot-1)-_H)) {

516                             wr(kBot-1) = wr(kBot);

517                         } else {

518                             wr(kBot) = wr(kBot-1);

519                         }

520                     }

521                 }

522 //

523 //              ==== Use up to NS of the the smallest magnatiude

524 //              .    shifts.  If there aren't NS shifts available,

525 //              .    then use them all, possibly dropping one to

526 //              .    make the number of shifts even. ====

527 //

528                 ns = min(ns, kBot-ks+1);

529                 ns = ns - (ns%2);

530                 ks = kBot - ns + 1;

531 //

532 //              ==== Small-bulge multi-shift QR sweep:

533 //              .    split workspace under the subdiagonal into

534 //              .    - a KDU-by-KDU work array U in the lower

535 //              .      left-hand-corner,

536 //              .    - a KDU-by-at-least-KDU-but-more-is-better

537 //              .      (KDU-by-NHo) horizontal work array WH along

538 //              .      the bottom edge,

539 //              .    - and an at-least-KDU-but-more-is-better-by-KDU

540 //              .      (NVE-by-KDU) vertical work WV arrow along

541 //              .      the left-hand-edge. ====

542 //

543                 IndexType kdu   = 3*ns - 3;

544                 IndexType ku    = n - kdu + 1;

545                 IndexType kmv   = kdu + 4;

546                 IndexType nho   = (n-kdu+1-4) - (kdu+1) + 1;

547 

548                 typedef typename GeMatrix<MH>::View GeMatrixView;

549                 GeMatrixView _V(IndexType(3), ns/2, work(_(1,3*ns/2)));

550                 auto _U     = H(_( ku,    n), _(    1,    kdu));

551                 auto _WV    = H(_(kmv,n-kdu), _(    1,    kdu));

552                 auto _WH    = H(_( ku,    n), _(kdu+1,kdu+nho));

553 //

554 //              ==== Small-bulge multi-shift QR sweep ====

555 //

556                 laqr5(wantT, wantZ, kacc22, kTop, kBot, ns,

557                       wr(_(ks,kBot)), wi(_(ks,kBot)), H,

558                       IndexType(iLoZ), IndexType(iHiZ), Z,

559                       _V, _U, _WV, _WH);

560             }

561 //

562 //          ==== Note progress (or the lack of it). ====

563 //

564             if (ld>0) {

565                 nDfl = 1;

566             } else {

567                 ++nDfl;

568             }

569 //

570 //          ==== End of main loop ====

571 //

572         }

573 //

574 //      ==== Iteration limit exceeded.  Set INFO to show where

575 //      .    the problem occurred and exit. ====

576 //

577         if (it>itMax) {

578             info = kBot;

579         }

580     }

581 

582     work(1) = lWorkOpt;

583     return info;

584 }

585 

586 //== interface for native lapack ===============================================

587 

588 #ifdef CHECK_CXXLAPACK

589 

590 template <typename IndexType, typename MH>

591 IndexType

592 laqr0_native_wsq(bool                  wantT,

593                  bool                  wantZ,

594                  IndexType             iLo,

595                  IndexType             iHi,

596                  const GeMatrix<MH>    &H)

597 {

598     typedef typename GeMatrix<MH>::ElementType  T;

599 

600     const LOGICAL   WANTT   = wantT;

601     const LOGICAL   WANTZ   = wantZ;

602     const INTEGER   N       = H.numRows();

603     const INTEGER   ILO     = iLo;

604     const INTEGER   IHI     = iHi;

605     const INTEGER   LDH     = H.leadingDimension();

606     const INTEGER   ILOZ    = 0;

607     const INTEGER   IHIZ    = 0;

608     const INTEGER   LDZ     = 0;

609     T               WORK;

610     T               DUMMY;

611     const INTEGER   LWORK   = -1;

612     INTEGER         INFO;

613 

614     if (IsSame<T,DOUBLE>::value) {

615         LAPACK_IMPL(dlaqr0)(&WANTT,

616                             &WANTZ,

617                             &N,

618                             &ILO,

619                             &IHI,

620                             &DUMMY,

621                             &LDH,

622                             &DUMMY,

623                             &DUMMY,

624                             &ILOZ,

625                             &IHIZ,

626                             &DUMMY,

627                             &LDZ,

628                             &WORK,

629                             &LWORK,

630                             &INFO);

631     } else {

632         ASSERT(0);

633     }

634     return WORK;

635 }

636 

637 template <typename IndexType, typename MH, typename VWR, typename VWI,

638           typename MZ, typename VWORK>

639 IndexType

640 laqr0_native(bool                  wantT,

641              bool                  wantZ,

642              IndexType             iLo,

643              IndexType             iHi,

644              GeMatrix<MH>          &H,

645              DenseVector<VWR>      &wr,

646              DenseVector<VWI>      &wi,

647              IndexType             iLoZ,

648              IndexType             iHiZ,

649              GeMatrix<MZ>          &Z,

650              DenseVector<VWORK>    &work)

651 {

652     typedef typename GeMatrix<MH>::ElementType  T;

653 

654     const LOGICAL   WANTT   = wantT;

655     const LOGICAL   WANTZ   = wantZ;

656     const INTEGER   N       = H.numRows();

657     const INTEGER   ILO     = iLo;

658     const INTEGER   IHI     = iHi;

659     const INTEGER   LDH     = H.leadingDimension();

660     const INTEGER   ILOZ    = iLoZ;

661     const INTEGER   IHIZ    = iHiZ;

662     const INTEGER   LDZ     = Z.leadingDimension();

663     const INTEGER   LWORK   = work.length();

664     INTEGER         INFO;

665 

666     if (IsSame<T,DOUBLE>::value) {

667         LAPACK_IMPL(dlaqr0)(&WANTT,

668                             &WANTZ,

669                             &N,

670                             &ILO,

671                             &IHI,

672                             H.data(),

673                             &LDH,

674                             wr.data(),

675                             wi.data(),

676                             &ILOZ,

677                             &IHIZ,

678                             Z.data(),

679                             &LDZ,

680                             work.data(),

681                             &LWORK,

682                             &INFO);

683     } else {

684         ASSERT(0);

685     }

686     ASSERT(INFO>=0);

687     return INFO;

688 }

689 

690 #endif // CHECK_CXXLAPACK

691 

692 //== public interface ==========================================================

693 template <typename IndexType, typename MH>

694 IndexType

695 laqr0_wsq(bool                  wantT,

696           bool                  wantZ,

697           IndexType             iLo,

698           IndexType             iHi,

699           const GeMatrix<MH>    &H)

700 {

701     using std::max;

702 //

703 //  Test the input parameters

704 //

705 #   ifndef NDEBUG

706     ASSERT(H.firstRow()==1);

707     ASSERT(H.firstCol()==1);

708     ASSERT(H.numRows()==H.numCols());

709 

710     const IndexType n = H.numRows();

711 

712     if (n>0) {

713         ASSERT(1<=iLo);

714         ASSERT(iLo<=iHi);

715         ASSERT(iHi<=n);

716     } else {

717         ASSERT(iLo==1);

718         ASSERT(iHi==0);

719     }

720 #   endif

721 

722 //

723 //  Call implementation

724 //

725     IndexType info = laqr0_generic_wsq(wantT, wantZ, iLo, iHi, H);

726 

727 #   ifdef CHECK_CXXLAPACK

728 //

729 //  Compare results

730 //

731     IndexType _info = laqr0_native_wsq(wantT, wantZ, iLo, iHi, H);

732 

733     if (info!=_info) {

734         std::cerr << "CXXLAPACK:  info = " << info << std::endl;

735         std::cerr << "F77LAPACK: _info = " << _info << std::endl;

736         ASSERT(0);

737     }

738 #   endif

739     return info;

740 }

741 

742 template <typename IndexType, typename MH, typename VWR, typename VWI,

743           typename MZ, typename VWORK>

744 IndexType

745 laqr0(bool                  wantT,

746       bool                  wantZ,

747       IndexType             iLo,

748       IndexType             iHi,

749       GeMatrix<MH>          &H,

750       DenseVector<VWR>      &wr,

751       DenseVector<VWI>      &wi,

752       IndexType             iLoZ,

753       IndexType             iHiZ,

754       GeMatrix<MZ>          &Z,

755       DenseVector<VWORK>    &work)

756 {

757     LAPACK_DEBUG_OUT("laqr0");

758 

759     using std::max;

760 //

761 //  Test the input parameters

762 //

763 #   ifndef NDEBUG

764     ASSERT(H.firstRow()==1);

765     ASSERT(H.firstCol()==1);

766     ASSERT(H.numRows()==H.numCols());

767 

768     const IndexType n = H.numRows();

769 

770     if (n>0) {

771         ASSERT(1<=iLo);

772         ASSERT(iLo<=iHi);

773         ASSERT(iHi<=n);

774     } else {

775         ASSERT(iLo==1);

776         ASSERT(iHi==0);

777     }

778 

779     ASSERT(wr.firstIndex()==1);

780     ASSERT(wr.length()>=iHi);

781 

782     ASSERT(wi.firstIndex()==1);

783     ASSERT(wi.length()>=iHi);

784 

785     ASSERT(1<=iLoZ);

786     ASSERT(iLoZ<=iLo);

787     ASSERT(iHi<=iHiZ);

788     ASSERT(iHiZ<=n);

789 

790     ASSERT(Z.firstRow()==1);

791     ASSERT(Z.firstCol()==1);

792     ASSERT(Z.numRows()>=iHi);

793     ASSERT(Z.numCols()>=iHi);

794 

795     ASSERT((work.length()==0) || (work.length()>=n));

796 #   endif

797 

798 //

799 //  Make copies of output arguments

800 //

801 #   ifdef CHECK_CXXLAPACK

802     typename GeMatrix<MH>::NoView          H_org       = H;

803     typename DenseVector<VWR>::NoView      wr_org      = wr;

804     typename DenseVector<VWI>::NoView      wi_org      = wi;

805     typename GeMatrix<MZ>::NoView          Z_org       = Z;

806     typename DenseVector<VWORK>::NoView    work_org    = work;

807 #   endif

808 

809 //

810 //  Call implementation

811 //

812     IndexType info = laqr0_generic(wantT, wantZ, iLo, iHi, H, wr, wi,

813                                    iLoZ, iHiZ, Z, work);

814 

815 #   ifdef CHECK_CXXLAPACK

816 //

817 //  Make copies of results computed by the generic implementation

818 //

819     typename GeMatrix<MH>::NoView          H_generic       = H;

820     typename DenseVector<VWR>::NoView      wr_generic      = wr;

821     typename DenseVector<VWI>::NoView      wi_generic      = wi;

822     typename GeMatrix<MZ>::NoView          Z_generic       = Z;

823     typename DenseVector<VWORK>::NoView    work_generic    = work;

824 //

825 //  restore output arguments

826 //

827     H = H_org;

828     wr = wr_org;

829     wi = wi_org;

830     Z = Z_org;

831     work = work_org;

832 //

833 //  Compare generic results with results from the native implementation

834 //

835     IndexType _info = laqr0_native(wantT, wantZ, iLo, iHi, H, wr, wi,

836                                    iLoZ, iHiZ, Z, work);

837 

838     bool failed = false;

839     if (! isIdentical(H_generic, H, "H_generic", "H")) {

840         std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;

841         std::cerr << "F77LAPACK: H = " << H << std::endl;

842         failed = true;

843     }

844 

845     if (! isIdentical(wr_generic, wr, "wr_generic", "wr")) {

846         std::cerr << "CXXLAPACK: wr_generic = " << wr_generic << std::endl;

847         std::cerr << "F77LAPACK: wr = " << wr << std::endl;

848         failed = true;

849     }

850 

851     if (! isIdentical(wi_generic, wi, "wi_generic", "wi")) {

852         std::cerr << "CXXLAPACK: wi_generic = " << wi_generic << std::endl;

853         std::cerr << "F77LAPACK: wi = " << wi << std::endl;

854         failed = true;

855     }

856 

857     if (! isIdentical(Z_generic, Z, "Z_generic", "Z")) {

858         std::cerr << "CXXLAPACK: Z_generic = " << Z_generic << std::endl;

859         std::cerr << "F77LAPACK: Z = " << Z << std::endl;

860         failed = true;

861     }

862 

863     if (! isIdentical(info, _info, " info", "_info")) {

864         std::cerr << "CXXLAPACK:  info = " << info << std::endl;

865         std::cerr << "F77LAPACK: _info = " << _info << std::endl;

866         failed = true;

867     }

868 

869     if (! isIdentical(work_generic, work, "work_generic", "work")) {

870         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;

871         std::cerr << "F77LAPACK: work = " << work << std::endl;

872         failed = true;

873     }

874 

875     if (failed) {

876         std::cerr << "error in: laqr0.tcc" << std::endl;

877         ASSERT(0);

878     } else {

879         // std::cerr << "passed: laqr0.tcc" << std::endl;

880     }

881 #   endif

882 

883     return info;

884 }

885 

886 //-- forwarding ----------------------------------------------------------------

887 template <typename IndexType, typename MH>

888 IndexType

889 laqr0_wsq(bool              wantT,

890           bool              wantZ,

891           IndexType         iLo,

892           IndexType         iHi,

893           const MH          &&H)

894 {

895     CHECKPOINT_ENTER;

896     const IndexType info = laqr0_wsq(wantT, wantZ, iLo, iHi, H);

897     CHECKPOINT_LEAVE;

898 

899     return info;

900 }

901 

902 template <typename IndexType, typename MH, typename VWR, typename VWI,

903           typename MZ, typename VWORK>

904 IndexType

905 laqr0(bool                  wantT,

906       bool                  wantZ,

907       IndexType             iLo,

908       IndexType             iHi,

909       MH                    &&H,

910       VWR                   &&wr,

911       VWI                   &&wi,

912       IndexType             iLoZ,

913       IndexType             iHiZ,

914       MZ                    &&Z,

915       VWORK                 &&work)

916 {

917     CHECKPOINT_ENTER;

918     const IndexType info = laqr0(wantT, wantZ, iLo, iHi, H, wr, wi,

919                                  iLoZ, iHiZ, Z, work);

920     CHECKPOINT_LEAVE;

921 

922     return info;

923 }

924 

925 } } // namespace lapack, flens

926 

927 #endif // FLENS_LAPACK_EIG_LAQR0_TCC