lahqr.tcc (flens/lapack/eig/lahqr.tcc)

  1 /*

  2  *   Copyright (c) 2011, Michael Lehn

  3  *

  4  *   All rights reserved.

  5  *

  6  *   Redistribution and use in source and binary forms, with or without

  7  *   modification, are permitted provided that the following conditions

  8  *   are met:

  9  *

 10  *   1) Redistributions of source code must retain the above copyright

 11  *      notice, this list of conditions and the following disclaimer.

 12  *   2) Redistributions in binary form must reproduce the above copyright

 13  *      notice, this list of conditions and the following disclaimer in

 14  *      the documentation and/or other materials provided with the

 15  *      distribution.

 16  *   3) Neither the name of the FLENS development group nor the names of

 17  *      its contributors may be used to endorse or promote products derived

 18  *      from this software without specific prior written permission.

 19  *

 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 31  */

 32 

 33 /* Based on

 34       SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,

 35      $                   ILOZ, IHIZ, Z, LDZ, INFO )

 36  *

 37  *  -- LAPACK auxiliary routine (version 3.2) --

 38  *     Univ. of Tennessee, Univ. of California Berkeley,

 39  *     Univ. of Colorado Denver and NAG Ltd..

 40  *     November 2006

 41  */

 42 

 43 #ifndef FLENS_LAPACK_EIG_LAHQR_TCC

 44 #define FLENS_LAPACK_EIG_LAHQR_TCC 1

 45 

 46 #include <cmath>

 47 #include <flens/blas/blas.h>

 48 #include <flens/lapack/lapack.h>

 49 

 50 namespace flens { namespace lapack {

 51 

 52 //== generic lapack implementation =============================================

 53 

 54 template <typename IndexType, typename MH, typename VWR, typename VWI,

 55           typename MZ>

 56 IndexType

 57 lahqr_generic(bool                  wantT,

 58               bool                  wantZ,

 59               IndexType             iLo,

 60               IndexType             iHi,

 61               GeMatrix<MH>          &H,

 62               DenseVector<VWR>      &wr,

 63               DenseVector<VWI>      &wi,

 64               IndexType             iLoZ,

 65               IndexType             iHiZ,

 66               GeMatrix<MZ>          &Z)

 67 {

 68     using std::abs;

 69     using std::max;

 70     using std::min;

 71 

 72     typedef typename GeMatrix<MH>::ElementType  T;

 73 

 74     const Underscore<IndexType>     _;

 75     const T                         Zero(0), One(1), Two(2);

 76     const T                         Dat1 = T(3)/T(4),

 77                                     Dat2 = T(-0.4375);

 78     const IndexType                 itMax = 30;

 79     const IndexType                 n = H.numRows();

 80 

 81     typedef typename GeMatrix<MH>::VectorView   VectorView;

 82     T           vBuffer[3];

 83     VectorView  v = typename VectorView::Engine(3, vBuffer);

 84 

 85 //

 86 //  Quick return if possible

 87 //

 88     if (n==0) {

 89         return 0;

 90     }

 91     if (iLo==iHi) {

 92         wr(iLo) = H(iLo, iLo);

 93         wi(iLo) = Zero;

 94         return 0;

 95     }

 96 //

 97 //  ==== clear out the trash ====

 98 //

 99     for (IndexType j=iLo; j<=iHi-3; ++j) {

100         H(j+2, j) = Zero;

101         H(j+3, j) = Zero;

102     }

103     if (iLo<=iHi-2) {

104         H(iHi, iHi-2) = Zero;

105     }

106 

107     const IndexType nh = iHi - iLo + 1;

108 //

109 //  Set machine-dependent constants for the stopping criterion.

110 //

111     T safeMin = lamch<T>(SafeMin);

112     T safeMax = One / safeMin;

113     labad(safeMin, safeMax);

114     const T ulp = lamch<T>(Precision);

115     const T smallNum = safeMin*(T(nh)/ulp);

116 //

117 //  i1 and i2 are the indices of the first row and last column of H

118 //  to which transformations must be applied. If eigenvalues only are

119 //  being computed, i1 and i2 are set inside the main loop.

120 //

121     IndexType i1 = -1, i2 = -1;

122     if (wantT) {

123         i1 = 1;

124         i2 = n;

125     }

126 //

127 //  The main loop begins here. Variable i is the loop index and decreases from

128 //  iHi to iLo in steps of 1 or 2. Each iteration of the loop works

129 //  with the active submatrix in rows and columns l to i.

130 //  Eigenvalues i+1 to iHi have already converged. Either l = iLo or

131 //  H(l,l-1) is negligible so that the matrix splits.

132 //

133     IndexType i = iHi;

134     while (true) {

135         IndexType l = iLo;

136         if (i<iLo) {

137             break;

138         }

139 //

140 //      Perform QR iterations on rows and columns iLo to i until a

141 //      submatrix of order 1 or 2 splits off at the bottom because a

142 //      subdiagonal element has become negligible.

143 //

144         IndexType its;

145         for (its=0; its<=itMax; ++its) {

146 //

147 //          Look for a single small subdiagonal element.

148 //

149             IndexType k;

150             for (k=i; k>=l+1; --k) {

151                 if (abs(H(k,k-1))<=smallNum) {

152                     break;

153                 }

154                 T test = abs(H(k-1,k-1)) + abs(H(k,k));

155                 if (test==Zero) {

156                     if (k-2>=iLo) {

157                         test += abs(H(k-1,k-2));

158                     }

159                     if (k+1<=iHi) {

160                         test += abs(H(k+1,k));

161                     }

162                 }

163 //              ==== The following is a conservative small subdiagonal

164 //              .    deflation  criterion due to Ahues & Tisseur (LAWN 122,

165 //              .    1997). It has better mathematical foundation and

166 //              .    improves accuracy in some cases.  ====

167                 if (abs(H(k,k-1))<=ulp*test) {

168                     const T ab = max(abs(H(k, k-1)), abs(H(k-1, k)));

169                     const T ba = min(abs(H(k, k-1)), abs(H(k-1, k)));

170                     const T aa = max(abs(H(k,   k)), abs(H(k-1, k-1)-H(k, k)));

171                     const T bb = min(abs(H(k,   k)), abs(H(k-1, k-1)-H(k, k)));

172                     const T s = aa + ab;

173                     if (ba*(ab/s)<=max(smallNum, ulp*(bb*(aa/s)))) {

174                         break;

175                     }

176                 }

177             }

178             l = k;

179 

180             if (l>iLo) {

181 //

182 //              H(l,l-1) is negligible

183 //

184                 H(l, l-1) = Zero;

185             }

186 //

187 //          Exit from loop if a submatrix of order 1 or 2 has split off.

188 //

189             if (l>=i-1) {

190                 break;

191             }

192 //

193 //          Now the active submatrix is in rows and columns l to i. If

194 //          eigenvalues only are being computed, only the active submatrix

195 //          need be transformed.

196 //

197             if (!wantT) {

198                 i1 = l;

199                 i2 = i;

200             }

201 

202             T H11, H12, H21, H22;

203             T rt1r, rt2r, rt1i, rt2i;

204             if (its==10) {

205 //

206 //              Exceptional shift.

207 //

208                 const T s = abs(H(l+1,l)) + abs(H(l+2, l+1));

209                 H11 = Dat1*s + H(l,l);

210                 H12 = Dat2*s;

211                 H21 = s;

212                 H22 = H11;

213             } else if (its==20) {

214 //

215 //              Exceptional shift.

216 //

217                 const T s = abs(H(i,i-1)) + abs(H(i-1,i-2));

218                 H11 = Dat1*s + H(i,i);

219                 H12 = Dat2*s;

220                 H21 = s;

221                 H22 = H11;

222             } else {

223 //

224 //              Prepare to use Francis' double shift

225 //              (i.e. 2nd degree generalized Rayleigh quotient)

226 //

227                 H11 = H(i-1, i-1);

228                 H21 = H(  i, i-1);

229                 H12 = H(i-1,   i);

230                 H22 = H(  i,   i);

231             }

232 

233             const T s = abs(H11) + abs(H12) + abs(H21) + abs(H22);

234             if (s==Zero) {

235                 rt1r = Zero;

236                 rt1i = Zero;

237                 rt2r = Zero;

238                 rt2i = Zero;

239             } else {

240                 H11 /= s;

241                 H21 /= s;

242                 H12 /= s;

243                 H22 /= s;

244                 const T tr = (H11+H22) / Two;

245                 const T det = (H11-tr)*(H22-tr) - H12*H21;

246                 const T rtDisc = sqrt(abs(det));

247                 if (det>=Zero) {

248 //

249 //                  ==== complex conjugate shifts ====

250 //

251                     rt1r = tr*s;

252                     rt2r = rt1r;

253                     rt1i = rtDisc*s;

254                     rt2i = -rt1i;

255                 } else {

256 //

257 //                  ==== real shifts (use only one of them)  ====

258 //

259                     rt1r = tr + rtDisc;

260                     rt2r = tr - rtDisc;

261                     if (abs(rt1r-H22)<=abs(rt2r-H22)) {

262                         rt1r *= s;

263                         rt2r = rt1r;

264                     } else {

265                         rt2r *= s;

266                         rt1r = rt2r;

267                     }

268                     rt1i = Zero;

269                     rt2i = Zero;

270                 }

271             }

272 //

273 //          Look for two consecutive small subdiagonal elements.

274 //

275             IndexType m;

276             for (m=i-2; m>=l; --m) {

277 //              Determine the effect of starting the double-shift QR

278 //              iteration at row m, and see if this would make H(m,m-1)

279 //              negligible.  (The following uses scaling to avoid

280 //              overflows and most underflows.)

281 //

282                 T H21S = H(m+1,m);

283                 T s = abs(H(m,m)-rt2r) + abs(rt2i) + abs(H21S);

284 

285                 H21S = H(m+1,m)/s;

286                 v(1) = H21S*H(m,m+1)

287                      + (H(m,m)-rt1r)*((H(m,m)-rt2r)/s)

288                      - rt1i*(rt2i/s);

289                 v(2) = H21S*(H(m,m) + H(m+1,m+1)-rt1r-rt2r);

290                 v(3) = H21S*H(m+2,m+1);

291 

292                 s = abs(v(1)) + abs(v(2)) + abs(v(3));

293                 v(1) /= s;

294                 v(2) /= s;

295                 v(3) /= s;

296 

297                 if (m==l) {

298                     break;

299                 }

300 

301                 const T value1 = abs(H(m,m-1))*(abs(v(2))+abs(v(3)));

302                 const T value2 = ulp*abs(v(1))

303                                  *(abs(H(m-1,m-1))+abs(H(m,m))+abs(H(m+1,m+1)));

304                 if (value1<=value2) {

305                     break;

306                 }

307             }

308 //

309 //          Double-shift QR step

310 //

311             for (k=m; k<=i-1; ++k) {

312 //

313 //              The first iteration of this loop determines a reflection G

314 //              from the vector v and applies it from left and right to H,

315 //              thus creating a nonzero bulge below the subdiagonal.

316 //

317 //              Each subsequent iteration determines a reflection G to

318 //              restore the Hessenberg form in the (k-1)th column, and thus

319 //              chases the bulge one step toward the bottom of the active

320 //              submatrix. 'nr' is the order of G.

321 //

322                 T   t1, t2, t3, v2, v3;

323 

324                 IndexType nr = min(IndexType(3), i-k+1);

325                 if (k>m) {

326                     v(_(1,nr)) = H(_(k,k+nr-1),k-1);

327                 }

328                 larfg(nr, v(1), v(_(2,nr)), t1);

329                 if (k>m) {

330                     H(k,  k-1) = v(1);

331                     H(k+1,k-1) = Zero;

332                     if (k<i-1) {

333                         H(k+2,k-1) = Zero;

334                     }

335                 } else if (m>l) {

336 //                  ==== Use the following instead of

337 //                  .    H( K, K-1 ) = -H( K, K-1 ) to

338 //                  .    avoid a bug when v(2) and v(3)

339 //                  .    underflow. ====

340                     H(k, k-1) *= (One-t1);

341                 }

342                 v2 = v(2);

343                 t2 = t1*v2;

344                 if (nr==3) {

345                     v3 = v(3);

346                     t3 = t1*v3;

347 //

348 //                  Apply G from the left to transform the rows of the matrix

349 //                  in columns k to i2.

350 //

351                     for (IndexType j=k; j<=i2; ++j) {

352                         const T sum = H(k,j) + v2*H(k+1,j) + v3*H(k+2,j);

353                         H(k,   j) -= sum*t1;

354                         H(k+1, j) -= sum*t2;

355                         H(k+2, j) -= sum*t3;

356                     }

357 //

358 //                  Apply G from the right to transform the columns of the

359 //                  matrix in rows i1 to min(k+3,i).

360 //

361                     for (IndexType j=i1; j<=min(k+3,i); ++j) {

362                         const T sum = H(j, k) + v2*H(j,k+1) + v3*H(j,k+2);

363                         H(j, k  ) -= sum*t1;

364                         H(j, k+1) -= sum*t2;

365                         H(j, k+2) -= sum*t3;

366                     }

367 

368                     if (wantZ) {

369 //

370 //                      Accumulate transformations in the matrix Z

371 //

372                         for (IndexType j=iLoZ; j<=iHiZ; ++j) {

373                             const T sum = Z(j, k) + v2*Z(j, k+1) + v3*Z(j, k+2);

374                             Z(j, k  ) -= sum*t1;

375                             Z(j, k+1) -= sum*t2;

376                             Z(j, k+2) -= sum*t3;

377                         }

378                     }

379                 } else if (nr==2) {

380 //

381 //                  Apply G from the left to transform the rows of the matrix

382 //                  in columns K to I2.

383 //

384                     for (IndexType j=k; j<=i2; ++j) {

385                         const T sum = H(k, j) + v2*H(k+1, j);

386                         H(k,   j) -= sum*t1;

387                         H(k+1, j) -= sum*t2;

388                     }

389 //

390 //                  Apply G from the right to transform the columns of the

391 //                  matrix in rows i1 to min(k+3,i).

392 //

393                     for (IndexType j=i1; j<=i; ++j) {

394                         const T sum = H(j, k) + v2*H(j, k+1);

395                         H(j, k  ) -= sum*t1;

396                         H(j, k+1) -= sum*t2;

397                     }

398 

399                     if (wantZ) {

400 //

401 //                      Accumulate transformations in the matrix Z

402 //

403                         for (IndexType j=iLoZ; j<=iHiZ; ++j) {

404                             const T sum = Z(j, k) + v2*Z(j, k+1);

405                             Z(j, k  ) -= sum*t1;

406                             Z(j, k+1) -= sum*t2;

407                         }

408                     }

409                 }

410             }

411         }

412 //

413 //      Failure to converge in remaining number of iterations

414 //

415         if (its>itMax) {

416             return i;

417         }

418 

419         if (l==i) {

420 //

421 //          H(I,I-1) is negligible: one eigenvalue has converged.

422 //

423             wr(i) = H(i, i);

424             wi(i) = Zero;

425         } else if (l==i-1) {

426 //

427 //          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.

428 //

429 //          Transform the 2-by-2 submatrix to standard Schur form,

430 //          and compute and store the eigenvalues.

431 //

432             T cs, sn;

433             lanv2(H(i-1,i-1), H(i-1,i), H(i,i-1), H(i,i),

434                   wr(i-1), wi(i-1), wr(i), wi(i),

435                   cs, sn);

436 

437             if (wantT) {

438 //

439 //              Apply the transformation to the rest of H.

440 //

441                 if (i2>i) {

442                     const auto cols = _(i+1,i2);

443                     blas::rot(H(i-1,cols), H(i,cols), cs, sn);

444                 }

445                 const auto rows = _(i1,i-2);

446                 blas::rot(H(rows, i-1), H(rows, i), cs, sn);

447             }

448             if (wantZ) {

449 //

450 //              Apply the transformation to Z.

451 //

452                 const auto rows = _(iLoZ, iHiZ);

453                 blas::rot(Z(rows, i-1), Z(rows, i), cs, sn);

454             }

455         }

456 //

457 //      return to start of the main loop with new value of I.

458 //

459         i = l - 1;

460     }

461     return 0;

462 }

463 

464 //== interface for native lapack ===============================================

465 

466 #ifdef CHECK_CXXLAPACK

467 

468 template <typename IndexType, typename MH, typename VWR, typename VWI,

469           typename MZ>

470 IndexType

471 lahqr_native(bool                   wantT,

472              bool                   wantZ,

473              IndexType              iLo,

474              IndexType              iHi,

475              GeMatrix<MH>           &H,

476              DenseVector<VWR>       &wr,

477              DenseVector<VWI>       &wi,

478              IndexType              iLoZ,

479              IndexType              iHiZ,

480              GeMatrix<MZ>           &Z)

481 {

482     typedef typename GeMatrix<MH>::ElementType  T;

483 

484     const LOGICAL    WANTT  = wantT;

485     const LOGICAL    WANTZ  = wantZ;

486     const INTEGER    N      = H.numRows();

487     const INTEGER    ILO    = iLo;

488     const INTEGER    IHI    = iHi;

489     const INTEGER    LDH    = H.leadingDimension();

490     const INTEGER    ILOZ   = iLoZ;

491     const INTEGER    IHIZ   = iHiZ;

492     const INTEGER    LDZ    = Z.leadingDimension();

493     INTEGER          INFO;

494 

495     if (IsSame<T,DOUBLE>::value) {

496         LAPACK_IMPL(dlahqr)(&WANTT,

497                             &WANTZ,

498                             &N,

499                             &ILO,

500                             &IHI,

501                             H.data(),

502                             &LDH,

503                             wr.data(),

504                             wi.data(),

505                             &ILOZ,

506                             &IHIZ,

507                             Z.data(),

508                             &LDZ,

509                             &INFO);

510     } else {

511         ASSERT(0);

512     }

513     return INFO;

514 }

515 

516 #endif // CHECK_CXXLAPACK

517 

518 //== public interface ==========================================================

519 

520 template <typename IndexType, typename MH, typename VWR, typename VWI,

521           typename MZ>

522 IndexType

523 lahqr(bool                  wantT,

524       bool                  wantZ,

525       IndexType             iLo,

526       IndexType             iHi,

527       GeMatrix<MH>          &H,

528       DenseVector<VWR>      &wr,

529       DenseVector<VWI>      &wi,

530       IndexType             iLoZ,

531       IndexType             iHiZ,

532       GeMatrix<MZ>          &Z)

533 {

534     LAPACK_DEBUG_OUT("lahqr");

535 

536 //

537 //  Test the input parameters

538 //

539     using std::max;

540 

541     ASSERT(H.firstRow()==1);

542     ASSERT(H.firstCol()==1);

543     ASSERT(H.numRows()==H.numCols());

544     ASSERT(wr.firstIndex()==1);

545     ASSERT(wr.length()==H.numRows());

546     ASSERT(wi.firstIndex()==1);

547     ASSERT(wi.length()==H.numRows());

548     ASSERT(wantZ || (Z.numRows()==H.numCols()));

549     ASSERT(wantZ || (Z.numCols()==H.numCols()));

550 

551     // 1 <= ILO <= max(1,IHI); IHI <= N.

552     ASSERT(1<=iLo);

553     ASSERT(iLo<=max(IndexType(1), iHi));

554     ASSERT(iHi<=H.numRows());

555 

556     // 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.

557     ASSERT(1<=iLoZ);

558     ASSERT(iLoZ<=iLo);

559     ASSERT(iHi<=iHiZ);

560     ASSERT(iHiZ<=H.numRows());

561 

562 //

563 //  Make copies of output arguments

564 //

565 #   ifdef CHECK_CXXLAPACK

566     typename GeMatrix<MH>::NoView          H_org    = H;

567     typename GeMatrix<MH>::NoView          Z_org    = Z;

568 

569     typename GeMatrix<MH>::NoView          _H       = H;

570     typename DenseVector<VWR>::NoView      _wr      = wr;

571     typename DenseVector<VWI>::NoView      _wi      = wi;

572     typename GeMatrix<MZ>::NoView          _Z       = Z;

573 #   endif

574 

575 //

576 //  Call implementation

577 //

578     IndexType info = lahqr_generic(wantT, wantZ, iLo,  iHi,

579                                    H, wr, wi, iLoZ, iHiZ, Z);

580 

581 //

582 //  Compare results

583 //

584 #   ifdef CHECK_CXXLAPACK

585     IndexType _info = lahqr_native(wantT, wantZ, iLo,  iHi,

586                                    _H, _wr, _wi, iLoZ, iHiZ, _Z);

587 

588     bool failed = false;

589     if (! isIdentical(H, _H, " H", "_H")) {

590         std::cerr << "CXXLAPACK:  H = " << H << std::endl;

591         std::cerr << "F77LAPACK: _H = " << _H << std::endl;

592         failed = true;

593     }

594 

595     if (! isIdentical(wr, _wr, " wr", "_wr")) {

596         std::cerr << "CXXLAPACK:  wr = " << wr << std::endl;

597         std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;

598         failed = true;

599     }

600 

601     if (! isIdentical(wi, _wi, " wi", "_wi")) {

602         std::cerr << "CXXLAPACK:  wi = " << wi << std::endl;

603         std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;

604         failed = true;

605     }

606 

607     if (! isIdentical(Z, _Z, " Z", "_Z")) {

608         std::cerr << "CXXLAPACK:  Z = " << Z << std::endl;

609         std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;

610         failed = true;

611     }

612 

613     if (! isIdentical(info, _info, " info", "_info")) {

614         std::cerr << "CXXLAPACK:  info = " << info << std::endl;

615         std::cerr << "F77LAPACK: _info = " << _info << std::endl;

616         failed = true;

617     }

618 

619     if (failed) {

620         std::cerr << "H_org = " << H_org << std::endl;

621         std::cerr << "Z_org = " << Z_org << std::endl;

622         std::cerr << "wantT = " << wantT

623                   << ", wantZ = " << wantZ

624                   << ", iLo = " << iLo

625                   << ", iHi = " << iHi

626                   << std::endl;

627         std::cerr << "error in: lahqr.tcc" << std::endl;

628         ASSERT(0);

629     } else {

630 //        std::cerr << "passed: lahqr.tcc" << std::endl;

631     }

632 #   endif

633     return info;

634 }

635 

636 //-- forwarding ----------------------------------------------------------------

637 template <typename IndexType, typename MH, typename VWR, typename VWI,

638           typename MZ>

639 IndexType

640 lahqr(bool          wantT,

641       bool          wantZ,

642       IndexType     iLo,

643       IndexType     iHi,

644       MH            &&H,

645       VWR           &&wr,

646       VWI           &&wi,

647       IndexType     iLoZ,

648       IndexType     iHiZ,

649       MZ            &&Z)

650 {

651     return lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);

652 }

653 

654 } } // namespace lapack, flens

655 

656 #endif // FLENS_LAPACK_EIG_LAHQR_TCC