laexc.tcc (flens/lapack/eig/laexc.tcc)

  1 /*

  2  *   Copyright (c) 2011, Michael Lehn

  3  *

  4  *   All rights reserved.

  5  *

  6  *   Redistribution and use in source and binary forms, with or without

  7  *   modification, are permitted provided that the following conditions

  8  *   are met:

  9  *

 10  *   1) Redistributions of source code must retain the above copyright

 11  *      notice, this list of conditions and the following disclaimer.

 12  *   2) Redistributions in binary form must reproduce the above copyright

 13  *      notice, this list of conditions and the following disclaimer in

 14  *      the documentation and/or other materials provided with the

 15  *      distribution.

 16  *   3) Neither the name of the FLENS development group nor the names of

 17  *      its contributors may be used to endorse or promote products derived

 18  *      from this software without specific prior written permission.

 19  *

 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 31  */

 32 

 33 /* Based on

 34  *

 35        SUBROUTINE DLAEXC( WANTQ, N, T, LDT, Q, LDQ, J1, N1, N2, WORK,

 36       $                   INFO )

 37  *

 38  *  -- LAPACK auxiliary routine (version 3.2.2) --

 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --

 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--

 41  *     June 2010

 42  */

 43 

 44 #ifndef FLENS_LAPACK_EIG_LAEXC_TCC

 45 #define FLENS_LAPACK_EIG_LAEXC_TCC 1

 46 

 47 #include <flens/aux/aux.h>

 48 #include <flens/blas/blas.h>

 49 #include <flens/lapack/lapack.h>

 50 

 51 namespace flens { namespace lapack {

 52 

 53 //== generic lapack implementation =============================================

 54 

 55 template <typename MT, typename MQ, typename IndexType, typename VWORK>

 56 IndexType

 57 laexc_generic(bool                          computeQ,

 58               GeMatrix<MT>                  &T,

 59               GeMatrix<MQ>                  &Q,

 60               IndexType                     j1,

 61               IndexType                     n1,

 62               IndexType                     n2,

 63               DenseVector<VWORK>            &work)

 64 {

 65     using std::abs;

 66 

 67     typedef typename GeMatrix<MT>::View         GeMatrixView;

 68     typedef typename GeMatrix<MT>::VectorView   DenseVectorView;

 69     typedef typename GeMatrix<MT>::ElementType  ElementType;

 70 

 71     const ElementType Zero(0), One(1), Ten(10);

 72 

 73     const IndexType n = T.numRows();

 74 

 75     const Underscore<IndexType>  _;

 76 

 77 //

 78 //    .. Local Arrays ..

 79 //

 80     ElementType _dData[16], _xData[4];

 81     GeMatrixView  D = typename GeMatrixView::Engine(4, 4, _dData, 4);

 82     GeMatrixView  X = typename GeMatrixView::Engine(2, 2, _xData, 2);

 83 

 84     ElementType _uData[3], _u1Data[3], _u2Data[3];

 85     DenseVectorView u  = typename DenseVectorView::Engine(3, _uData);

 86     DenseVectorView u1 = typename DenseVectorView::Engine(3, _u1Data);

 87     DenseVectorView u2 = typename DenseVectorView::Engine(3, _u2Data);

 88 //

 89 //  Quick return if possible

 90 //

 91     if (n==0 || n1==0 || n2==0) {

 92         return 0;

 93     }

 94     if (j1+n1>n) {

 95         return 0;

 96     }

 97 

 98     const IndexType j2 = j1 + 1;

 99     const IndexType j3 = j1 + 2;

100     const IndexType j4 = j1 + 3;

101 

102     ElementType t11, t22, t33;

103 

104     if (n1==1 && n2==1) {

105 //

106 //      Swap two 1-by-1 blocks.

107 //

108         t11 = T(j1,j1);

109         t22 = T(j2,j2);

110 //

111 //      Determine the transformation to perform the interchange.

112 //

113         ElementType cs, sn, temp;

114         lartg(T(j1,j2), t22-t11, cs, sn, temp);

115 //

116 //      Apply transformation to the matrix T.

117 //

118         if (j3<=n) {

119             blas::rot(T(j1,_(j3,n)), T(j2,_(j3,n)), cs, sn);

120         }

121         blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);

122 

123         T(j1,j1) = t22;

124         T(j2,j2) = t11;

125 

126         if (computeQ) {

127 //

128 //          Accumulate transformation in the matrix Q.

129 //

130             blas::rot(Q(_,j1), Q(_,j2), cs, sn);

131         }

132 

133     } else {

134 //

135 //      Swapping involves at least one 2-by-2 block.

136 //

137 //      Copy the diagonal block of order N1+N2 to the local array D

138 //      and compute its norm.

139 //

140         const IndexType nd = n1 + n2;

141         auto _D = D(_(1,nd),_(1,nd));

142 

143         _D = T(_(j1,j1+nd-1),_(j1,j1+nd-1));

144         ElementType normD = lan(MaximumNorm, _D);

145 

146         ElementType cs, sn, wr1, wr2, wi1, wi2;

147 

148         ElementType scale, normX, tau, tau1, tau2;

149 //

150 //      Compute machine-dependent threshold for test for accepting

151 //      swap.

152 //

153         const ElementType eps = lamch<ElementType>(Precision);

154         const ElementType smallNum = lamch<ElementType>(SafeMin) / eps;

155         const ElementType thresh = max(Ten*eps*normD, smallNum);

156 //

157 //      Solve T11*X - X*T22 = scale*T12 for X.

158 //

159         const auto T11 = D(_(1,n1),_(1,n1));

160         const auto T12 = D(_(1,n1),_(n1+1,nd));

161         const auto T22 = D(_(n1+1,nd),_(n1+1,nd));

162 

163         auto _X = X(_(1,n1),_(1,n2));

164 

165         lasy2(false, false, IndexType(-1), T11, T22, T12, scale, _X, normX);

166 //

167 //      Swap the adjacent diagonal blocks.

168 //

169         const IndexType k = n1 + n1 + n2 - 3;

170 

171         switch (k) {

172 //

173 //          N1 = 1, N2 = 2: generate elementary reflector H so that:

174 //

175 //          ( scale, X11, X12 ) H = ( 0, 0, * )

176 //

177         case 1:

178             u(1) = scale;

179             u(2) = X(1,1);

180             u(3) = X(1,2);

181             larfg(IndexType(3), u(3), u(_(1,2)), tau);

182             u(3) = One;

183             t11 = T(j1,j1);

184 //

185 //          Perform swap provisionally on diagonal block in D.

186 //

187             larfx(Left, u, tau, _D, work(_(1,3)));

188             larfx(Right, u, tau, _D, work(_(1,3)));

189 //

190 //          Test whether to reject swap.

191 //

192             if (max(abs(D(3,1)), abs(D(3,2)), abs(D(3,3)-t11))>thresh) {

193 //

194 //              Return 1 if swap was rejected.

195 //

196                 return 1;

197             }

198 //

199 //          Accept swap: apply transformation to the entire matrix T.

200 //

201             larfx(Left, u, tau, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));

202             larfx(Right, u, tau, T(_(1,j2),_(j1,j1+3-1)), work(_(1,j2)));

203 

204             T(j3,j1) = Zero;

205             T(j3,j2) = Zero;

206             T(j3,j3) = t11;

207 

208             if (computeQ) {

209 //

210 //              Accumulate transformation in the matrix Q.

211 //

212                 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);

213             }

214             break;

215 

216         case 2:

217 //

218 //          N1 = 2, N2 = 1: generate elementary reflector H so that:

219 //

220 //          H (  -X11 ) = ( * )

221 //            (  -X21 ) = ( 0 )

222 //            ( scale ) = ( 0 )

223 //

224             u(1) = -X(1,1);

225             u(2) = -X(2,1);

226             u(3) = scale;

227             larfg(IndexType(3), u(1), u(_(2,3)), tau);

228             u(1) = One;

229             t33 = T(j3,j3);

230 //

231 //          Perform swap provisionally on diagonal block in D.

232 //

233             larfx(Left, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));

234             larfx(Right, u, tau, D(_(1,3),_(1,3)), work(_(1,3)));

235 //

236 //          Test whether to reject swap.

237 //

238             if (max(abs(D(2,1)), abs(D(3,1)), abs(D(1,1)-t33))>thresh) {

239 //

240 //              Return 1 if swap was rejected.

241 //

242                 return 1;

243             }

244 //

245 //          Accept swap: apply transformation to the entire matrix T.

246 //

247             larfx(Right, u, tau, T(_(1,j3),_(j1, j1+3-1)), work(_(1,j3)));

248             larfx(Left, u, tau, T(_(j1,j1+3-1),_(j2,n)), work(_(1,n-j1)));

249 

250             T(j1,j1) = t33;

251             T(j2,j1) = Zero;

252             T(j3,j1) = Zero;

253 

254             if (computeQ) {

255 //

256 //              Accumulate transformation in the matrix Q.

257 //

258                 larfx(Right, u, tau, Q(_,_(j1,j1+3-1)), work);

259             }

260             break;

261 

262         case 3:

263 //

264 //          N1 = 2, N2 = 2: generate elementary reflectors H(1) and H(2) so

265 //          that:

266 //

267 //          H(2) H(1) (  -X11  -X12 ) = (  *  * )

268 //                    (  -X21  -X22 )   (  0  * )

269 //                    ( scale    0  )   (  0  0 )

270 //                    (    0  scale )   (  0  0 )

271 //

272             u1(1) = -X(1,1);

273             u1(2) = -X(2,1);

274             u1(3) = scale;

275             larfg(IndexType(3), u1(1), u1(_(2,3)), tau1);

276             u1(1) = One;

277 

278             const ElementType temp = -tau1*(X(1,2)+u1(2)*X(2,2));

279             u2(1) = -temp*u1(2) - X(2,2);

280             u2(2) = -temp*u1(3);

281             u2(3) = scale;

282             larfg(IndexType(3), u2(1), u2(_(2,3)), tau2);

283             u2(1) = One;

284 //

285 //          Perform swap provisionally on diagonal block in D.

286 //

287             larfx(Left,  u1, tau1, D(_(1,3),_(1,4)), work(_(1,4)));

288             larfx(Right, u1, tau1, D(_(1,4),_(1,3)), work(_(1,4)));

289             larfx(Left,  u2, tau2, D(_(2,4),_(1,4)), work(_(1,4)));

290             larfx(Right, u2, tau2, D(_(1,4),_(2,4)), work(_(1,4)));

291 //

292 //          Test whether to reject swap.

293 //

294             if (max(abs(D(3,1)), abs(D(3,2)), abs(D(4,1)), abs(D(4,2)))>thresh)

295             {

296 //

297 //              Return 1 if swap was rejected.

298 //

299                 return 1;

300             }

301 //

302 //          Accept swap: apply transformation to the entire matrix T.

303 //

304             larfx(Left,  u1, tau1, T(_(j1,j1+3-1),_(j1,n)), work(_(1,n-j1+1)));

305             larfx(Right, u1, tau1, T(_(1,j4),_(j1,j1+3-1)), work(_(1,j4)));

306             larfx(Left,  u2, tau2, T(_(j2,j2+3-1),_(j1,n)), work(_(1,n-j1+1)));

307             larfx(Right, u2, tau2, T(_(1,j4),_(j2,j2+3-1)), work(_(1,j4)));

308 

309             T(j3,j1) = Zero;

310             T(j3,j2) = Zero;

311             T(j4,j1) = Zero;

312             T(j4,j2) = Zero;

313 

314             if (computeQ) {

315 //

316 //              Accumulate transformation in the matrix Q.

317 //

318                 larfx(Right, u1, tau1, Q(_,_(j1,j1+3-1)), work);

319                 larfx(Right, u2, tau2, Q(_,_(j2,j2+3-1)), work);

320             }

321         }

322 

323         if (n2==2) {

324 //

325 //          Standardize new 2-by-2 block T11

326 //

327             lanv2(T(j1,j1), T(j1,j2), T(j2,j1), T(j2,j2),

328                   wr1, wi1, wr2, wi2, cs, sn);

329             blas::rot(T(j1,_(j1+2,n)), T(j2,_(j1+2,n)), cs, sn);

330             blas::rot(T(_(1,j1-1),j1), T(_(1,j1-1),j2), cs, sn);

331             if (computeQ) {

332                 blas::rot(Q(_,j1), Q(_,j2), cs, sn);

333             }

334         }

335 

336         if (n1==2) {

337 //

338 //          Standardize new 2-by-2 block T22

339 //

340             const IndexType j3 = j1 + n2;

341             const IndexType j4 = j3 + 1;

342             lanv2(T(j3,j3), T(j3,j4), T(j4,j3), T(j4,j4),

343                   wr1, wi1, wr2, wi2, cs, sn);

344             if (j3+2<=n) {

345                 blas::rot(T(j3,_(j3+2,n)), T(j4,_(j3+2,n)), cs, sn);

346             }

347             blas::rot(T(_(1,j3-1),j3), T(_(1,j3-1),j4), cs, sn);

348             if (computeQ) {

349                 blas::rot(Q(_,j3), Q(_,j4), cs, sn);

350             }

351         }

352     }

353     return 0;

354 }

355 

356 //== interface for native lapack ===============================================

357 

358 #ifdef CHECK_CXXLAPACK

359 

360 template <typename MT, typename MQ, typename IndexType, typename VWORK>

361 IndexType

362 laexc_native(bool                          computeQ,

363              GeMatrix<MT>                  &T,

364              GeMatrix<MQ>                  &Q,

365              IndexType                     j1,

366              IndexType                     n1,

367              IndexType                     n2,

368              DenseVector<VWORK>            &work)

369 {

370     typedef typename GeMatrix<MT>::ElementType ElementType;

371 

372     const LOGICAL   WANTQ  = computeQ;

373     const INTEGER   N      = T.numRows();

374     const INTEGER   LDT    = T.leadingDimension();

375     const INTEGER   LDQ    = Q.leadingDimension();

376     const INTEGER   J1     = j1;

377     const INTEGER   N1     = n1;

378     const INTEGER   N2     = n2;

379     INTEGER         INFO;

380 

381     if (IsSame<ElementType,DOUBLE>::value) {

382         LAPACK_IMPL(dlaexc)(&WANTQ,

383                             &N,

384                             T.data(),

385                             &LDT,

386                             Q.data(),

387                             &LDQ,

388                             &J1,

389                             &N1,

390                             &N2,

391                             work.data(),

392                             &INFO);

393     } else {

394         ASSERT(0);

395     }

396     ASSERT(INFO>=0);

397 

398     return INFO;

399 }

400 

401 #endif // CHECK_CXXLAPACK

402 

403 //== public interface ==========================================================

404 

405 template <typename MT, typename MQ, typename IndexType, typename VWORK>

406 IndexType

407 laexc(bool                          computeQ,

408       GeMatrix<MT>                  &T,

409       GeMatrix<MQ>                  &Q,

410       IndexType                     j1,

411       IndexType                     n1,

412       IndexType                     n2,

413       DenseVector<VWORK>            &work)

414 {

415     LAPACK_DEBUG_OUT("BEGIN: laexc");

416 

417 //

418 //  Test the input parameters

419 //

420 #   ifndef NDEBUG

421     ASSERT(T.firstRow()==1);

422     ASSERT(T.firstCol()==1);

423     ASSERT(T.numRows()==T.numCols());

424 

425     const IndexType n = T.numRows();

426 

427     if (computeQ) {

428         ASSERT(Q.firstRow()==1);

429         ASSERT(Q.firstCol()==1);

430         ASSERT(Q.numRows()==Q.numCols());

431         ASSERT(Q.numRows()==n);

432     }

433 

434     ASSERT(j1>=1);

435     ASSERT((n1==0) || (n1==1) || (n1==2));

436     ASSERT((n2==0) || (n2==1) || (n2==2));

437 

438     ASSERT(work.firstIndex()==1);

439     ASSERT(work.length()==n);

440 #   endif

441 

442 #   ifdef CHECK_CXXLAPACK

443 //

444 //  Make copies of output arguments

445 //

446     typename GeMatrix<MT>::NoView           T_org      = T;

447     typename GeMatrix<MQ>::NoView           Q_org      = Q;

448     typename DenseVector<VWORK>::NoView     work_org   = work;

449 #   endif

450 

451 //

452 //  Call implementation

453 //

454     IndexType info = laexc_generic(computeQ, T, Q, j1, n1, n2, work);

455 

456 #   ifdef CHECK_CXXLAPACK

457 //

458 //  Make copies of results computed by the generic implementation

459 //

460     typename GeMatrix<MT>::NoView           T_generic       = T;

461     typename GeMatrix<MQ>::NoView           Q_generic       = Q;

462     typename DenseVector<VWORK>::NoView     work_generic    = work;

463 

464 //

465 //  restore output arguments

466 //

467     T = T_org;

468     Q = Q_org;

469     work = work_org;

470 

471 //

472 //  Compare generic results with results from the native implementation

473 //

474 

475     IndexType _info = laexc_native(computeQ, T, Q, j1, n1, n2, work);

476 

477     bool failed = false;

478     if (! isIdentical(T_generic, T, "T_generic", "T")) {

479         std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;

480         std::cerr << "F77LAPACK: T = " << T << std::endl;

481         failed = true;

482     }

483 

484     if (! isIdentical(Q_generic, Q, "Q_generic", "Q")) {

485         std::cerr << "CXXLAPACK: Q_generic = " << Q_generic << std::endl;

486         std::cerr << "F77LAPACK: Q = " << Q << std::endl;

487         failed = true;

488     }

489 

490     if (! isIdentical(work_generic, work, "work_generic", "work")) {

491         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;

492         std::cerr << "F77LAPACK: work = " << work << std::endl;

493         failed = true;

494     }

495 

496     if (! isIdentical(info, _info, " info", "_info")) {

497         std::cerr << "CXXLAPACK:  info = " << info << std::endl;

498         std::cerr << "F77LAPACK: _info = " << _info << std::endl;

499         failed = true;

500     }

501 

502     if (failed) {

503         ASSERT(0);

504     }

505 #   endif

506 

507     LAPACK_DEBUG_OUT("END: laexc");

508 

509     return info;

510 }

511 

512 //-- forwarding ----------------------------------------------------------------

513 template <typename MT, typename MQ, typename IndexType, typename VWORK>

514 IndexType

515 laexc(bool              computeQ,

516       MT                &&T,

517       MQ                &&Q,

518       IndexType         j1,

519       IndexType         n1,

520       IndexType         n2,

521       VWORK             &&work)

522 {

523     CHECKPOINT_ENTER;

524     const IndexType info = laexc(computeQ, T, Q, j1, n1, n2, work);

525     CHECKPOINT_LEAVE;

526 

527     return info;

528 }

529 

530 } } // namespace lapack, flens

531 

532 #endif // FLENS_LAPACK_EIG_LAEVC_TCC