1 /*
   2  *   Copyright (c) 2011, Michael Lehn
   3  *
   4  *   All rights reserved.
   5  *
   6  *   Redistribution and use in source and binary forms, with or without
   7  *   modification, are permitted provided that the following conditions
   8  *   are met:
   9  *
  10  *   1) Redistributions of source code must retain the above copyright
  11  *      notice, this list of conditions and the following disclaimer.
  12  *   2) Redistributions in binary form must reproduce the above copyright
  13  *      notice, this list of conditions and the following disclaimer in
  14  *      the documentation and/or other materials provided with the
  15  *      distribution.
  16  *   3) Neither the name of the FLENS development group nor the names of
  17  *      its contributors may be used to endorse or promote products derived
  18  *      from this software without specific prior written permission.
  19  *
  20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31  */
  32 
  33 /* Based on
  34  *
  35        SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
  36       $                   LDVR, MM, M, WORK, INFO )
  37  *
  38  *  -- LAPACK routine (version 3.3.1) --
  39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  41  *  -- April 2011                                                      --
  42  */
  43 
  44 #ifndef FLENS_LAPACK_EIG_TREVC_TCC
  45 #define FLENS_LAPACK_EIG_TREVC_TCC 1
  46 
  47 #include <flens/aux/aux.h>
  48 #include <flens/blas/blas.h>
  49 #include <flens/lapack/lapack.h>
  50 
  51 namespace flens { namespace lapack {
  52 
  53 //== generic lapack implementation =============================================
  54 
  55 template <typename VSELECT, typename MT, typename MVL, typename MVR,
  56           typename IndexType, typename VWORK>
  57 void
  58 trevc_generic(bool                          computeVL,
  59               bool                          computeVR,
  60               TREVC::Job                    howMany,
  61               DenseVector<VSELECT>          &select,
  62               const GeMatrix<MT>            &T,
  63               GeMatrix<MVL>                 &VL,
  64               GeMatrix<MVR>                 &VR,
  65               IndexType                     mm,
  66               IndexType                     &m,
  67               DenseVector<VWORK>            &work)
  68 {
  69 
  70     using std::abs;
  71     using flens::max;
  72 
  73     typedef typename GeMatrix<MT>::ElementType  ElementType;
  74 
  75     // TODO: define a colmajor GeMatrixView
  76     typedef typename GeMatrix<MT>::View         GeMatrixView;
  77 
  78     const ElementType   Zero(0), One(1);
  79 
  80     const Underscore<IndexType> _;
  81     const IndexType n = T.numCols();
  82 
  83 //
  84 //    .. Local Arrays ..
  85 //
  86     ElementType  _xData[4];
  87     GeMatrixView X = typename GeMatrixView::Engine(22, _xData, 2);
  88 
  89     GeMatrixView Work(n, 3, work, n);
  90 
  91 //
  92 //  Decode and test the input parameters
  93 //
  94     const bool over     = howMany==TREVC::Backtransform;
  95     const bool someV    = howMany==TREVC::Selected;
  96 
  97 //
  98 //  Set M to the number of columns required to store the selected
  99 //  eigenvectors, standardize the array SELECT if necessary, and
 100 //  test MM.
 101 //
 102     if (someV) {
 103         m = 0;
 104         bool pair = false;
 105         for (IndexType j=1; j<=n; ++j) {
 106             if (pair) {
 107                 pair        = false;
 108                 select(j)   = false;
 109             } else {
 110                 if (j<n) {
 111                     if (T(j+1,j)==Zero) {
 112                         if (select(j)) {
 113                             ++m;
 114                         }
 115                     } else {
 116                         pair = true;
 117                         if (select(j) || select(j+1)) {
 118                             select(j) = true;
 119                             m += 2;
 120                         }
 121                     }
 122                 } else {
 123                     if (select(n)) {
 124                         ++m;
 125                     }
 126                 }
 127             }
 128         }
 129     } else {
 130         m = n;
 131     }
 132 
 133     if (mm<m) {
 134         ASSERT(0);
 135     }
 136 //
 137 //   Quick return if possible.
 138 //
 139     if (n==0) {
 140         return;
 141     }
 142 //
 143 //  Set the constants to control overflow.
 144 //
 145     ElementType underflow = lamch<ElementType>(SafeMin);
 146     ElementType overflow  = One / underflow;
 147     labad(underflow, overflow);
 148 
 149     const ElementType ulp         = lamch<ElementType>(Precision);
 150     const ElementType smallNum    = underflow*(n/ulp);
 151     const ElementType bigNum      = (One-ulp)/smallNum;
 152 //
 153 //  Compute 1-norm of each column of strictly upper triangular
 154 //  part of T to control overflow in triangular solver.
 155 //
 156     work(1) = Zero;
 157     for (IndexType j=2; j<=n; ++j) {
 158         work(j) = Zero;
 159         for (IndexType i=1; i<=j-1; ++i) {
 160             work(j) += abs(T(i,j));
 161         }
 162     }
 163 //
 164 //  Index IP is used to specify the real or complex eigenvalue:
 165 //    IP = 0, real eigenvalue,
 166 //         1, first of conjugate complex pair: (wr,wi)
 167 //        -1, second of conjugate complex pair: (wr,wi)
 168 //
 169     const IndexType n2 = 2*n;
 170 
 171     ElementType scale, xNorm, wr, wi;
 172 
 173     if (computeVR) {
 174 //
 175 //      Compute right eigenvectors.
 176 //
 177 
 178         IndexType ip = 0;
 179         IndexType is = m;
 180         for (IndexType ki=n; ki>=1; --ki) {
 181 
 182             if (ip==1) {
 183                 ip = 0;
 184                 continue;
 185             }
 186             if (ki!=1 && T(ki,ki-1)!=Zero) {
 187                 ip = -1;
 188             }
 189 
 190             if (someV) {
 191                 if (ip==0) {
 192                     if (!select(ki)) {
 193                         if (ip==1) {
 194                             ip = 0;
 195                         }
 196                         if (ip==-1) {
 197                             ip = 1;
 198                         }
 199                         continue;
 200                     }
 201                 } else {
 202                     if (!select(ki-1)) {
 203                         if (ip==1) {
 204                             ip = 0;
 205                         }
 206                         if (ip==-1) {
 207                             ip = 1;
 208                         }
 209                         continue;
 210                     }
 211                 }
 212             }
 213 //
 214 //          Compute the KI-th eigenvalue (WR,WI).
 215 //
 216             wr = T(ki,ki);
 217             wi = Zero;
 218             if (ip!=0) {
 219                 wi = sqrt(abs(T(ki,ki-1)))*sqrt(abs(T(ki-1,ki)));
 220             }
 221             const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
 222 
 223             if (ip==0) {
 224 //
 225 //              Real right eigenvector
 226 //
 227                 work(ki+n) = One;
 228 //
 229 //              Form right-hand side
 230 //              TODO: do this by work(_(...)) = -T(_(...),ki)
 231 //
 232                 for (IndexType k=1; k<=ki-1; ++k) {
 233                     work(k+n) = -T(k,ki);
 234                 }
 235 //
 236 //              Solve the upper quasi-triangular system:
 237 //                 (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
 238 //
 239                 IndexType jNext = ki - 1;
 240                 for (IndexType j=ki-1; j>=1; --j) {
 241                     if (j>jNext) {
 242                         continue;
 243                     }
 244                     IndexType j1 = j;
 245                     IndexType j2 = j;
 246                     jNext = j - 1;
 247                     if (j>1) {
 248                         if (T(j,j-1)!=Zero) {
 249                             j1 = j - 1;
 250                             jNext = j - 2;
 251                         }
 252                     }
 253 
 254                     if (j1==j2) {
 255 //
 256 //                      1-by-1 diagonal block
 257 //
 258                         laln2(false,
 259                               IndexType(1),
 260                               safeMin,
 261                               One,
 262                               T(_(j,j),_(j,j)),
 263                               One,
 264                               One,
 265                               Work(_(j,j), _(2,2)),
 266                               wr,
 267                               Zero,
 268                               X(_(1,1),_(1,1)),
 269                               scale,
 270                               xNorm);
 271 //
 272 //                      Scale X(1,1) to avoid overflow when updating
 273 //                      the right-hand side.
 274 //
 275                         if (xNorm>One) {
 276                             if (work(j)>bigNum/xNorm) {
 277                                 X(1,1)  /= xNorm;
 278                                 scale   /= xNorm;
 279                             }
 280                         }
 281 //
 282 //                      Scale if necessary
 283 //
 284                         if (scale!=One) {
 285                             work(_(1+n,ki+n)) *= scale;
 286                         }
 287                         work(j+n) = X(1,1);
 288 //
 289 //                      Update right-hand side
 290 //
 291                         work(_(1+n,j-1+n)) -= X(1,1)*T(_(1,j-1),j);
 292 
 293                     } else {
 294 //
 295 //                      2-by-2 diagonal block
 296 //
 297                         laln2(false,
 298                               IndexType(1),
 299                               safeMin,
 300                               One,
 301                               T(_(j-1,j),_(j-1,j)),
 302                               One,
 303                               One,
 304                               Work(_(j-1,j), _(2,2)),
 305                               wr,
 306                               Zero,
 307                               X(_(1,2),_(1,1)),
 308                               scale,
 309                               xNorm);
 310 
 311 //
 312 //                      Scale X(1,1) and X(2,1) to avoid overflow when
 313 //                      updating the right-hand side.
 314 //
 315                         if (xNorm>One) {
 316                             const ElementType beta = max(work(j-1), work(j));
 317                             if (beta>bigNum/xNorm) {
 318                                 X(1,1) /= xNorm;
 319                                 X(2,1) /= xNorm;
 320                                 scale  /= xNorm;
 321                             }
 322                         }
 323 //
 324 //                      Scale if necessary
 325 //
 326                         if (scale!=One) {
 327                             work(_(1+n,ki+n)) *= scale;
 328                         }
 329                         work(j-1+n) = X(1,1);
 330                         work(j+n)   = X(2,1);
 331 //
 332 //                      Update right-hand side
 333 //
 334                         work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
 335                         work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
 336                     }
 337                 }
 338 //
 339 //              Copy the vector x or Q*x to VR and normalize.
 340 //
 341                 if (!over) {
 342                     VR(_(1,ki),is) = work(_(1+n,ki+n));
 343 
 344                     const IndexType ii = blas::iamax(VR(_(1,ki),is));
 345                     const ElementType reMax = One / abs(VR(ii,is));
 346                     VR(_(1,ki),is) *= reMax;
 347 
 348                     VR(_(ki+1,n),is) = Zero;
 349                 } else {
 350                     if (ki>1) {
 351                         blas::mv(NoTrans, One,
 352                                  VR(_,_(1,ki-1)),
 353                                  work(_(1+n,n+ki-1)),
 354                                  work(ki+n),
 355                                  VR(_,ki));
 356                     }
 357                     const IndexType ii = blas::iamax(VR(_,ki));
 358                     const ElementType reMax = One / abs(VR(ii,ki));
 359                     VR(_,ki) *= reMax;
 360                 }
 361 
 362             } else {
 363 //
 364 //              Complex right eigenvector.
 365 //
 366 //              Initial solve
 367 //                [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
 368 //                [ (T(KI,KI-1)   T(KI,KI)   )               ]
 369 //
 370                 if (abs(T(ki-1,ki))>=abs(T(ki,ki-1))) {
 371                     work(ki-1+n)  = One;
 372                     work(ki+n2)   = wi / T(ki-1,ki);
 373                 } else {
 374                     work(ki-1+n ) = -wi / T(ki,ki-1);
 375                     work(ki+n2 )  = One;
 376                 }
 377                 work(ki+n) = Zero;
 378                 work(ki-1+n2) = Zero;
 379 //
 380 //              Form right-hand side
 381 //
 382                 for (IndexType k=1; k<=ki-2; ++k) {
 383                     work(k+n)   = -work(ki-1+n) * T(k,ki-1);
 384                     work(k+n2)  = -work(ki+n2)  * T(k,ki);
 385                 }
 386 //
 387 //              Solve upper quasi-triangular system:
 388 //              (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
 389 //
 390                 IndexType jNext = ki - 2;
 391                 for (IndexType j=ki-2; j>=1; --j) {
 392                     if (j>jNext) {
 393                         continue;
 394                     }
 395                     IndexType j1 = j;
 396                     IndexType j2 = j;
 397                     jNext = j - 1;
 398                     if (j>1) {
 399                         if (T(j,j-1)!=Zero) {
 400                             j1 = j - 1;
 401                             jNext = j - 2;
 402                         }
 403                     }
 404 
 405                     if (j1==j2) {
 406 //
 407 //                      1-by-1 diagonal block
 408 //
 409                         laln2(false,
 410                               IndexType(2),
 411                               safeMin,
 412                               One,
 413                               T(_(j,j),_(j,j)),
 414                               One,
 415                               One,
 416                               Work(_(j,j), _(2,3)),
 417                               wr,
 418                               wi,
 419                               X(_(1,1),_(1,2)),
 420                               scale,
 421                               xNorm);
 422 //
 423 //                      Scale X(1,1) and X(1,2) to avoid overflow when
 424 //                      updating the right-hand side.
 425 //
 426                         if (xNorm>One) {
 427                             if (work(j)>bigNum/xNorm) {
 428                                 X(1,1) /= xNorm;
 429                                 X(1,2) /= xNorm;
 430                                 scale /= xNorm;
 431                             }
 432                         }
 433 //
 434 //                      Scale if necessary
 435 //
 436                         if (scale!=One) {
 437                             work(_(1+n,ki+n))   *= scale;
 438                             work(_(1+n2,ki+n2)) *= scale;
 439                         }
 440                         work(j+n)   = X(1,1);
 441                         work(j+n2)  = X(1,2);
 442 //
 443 //                      Update the right-hand side
 444 //
 445                         work(_(1+n,j-1+n))   -= X(1,1)*T(_(1,j-1),j);
 446                         work(_(1+n2,j-1+n2)) -= X(1,2)*T(_(1,j-1),j);
 447 
 448                     } else {
 449 //
 450 //                      2-by-2 diagonal block
 451 //
 452                         laln2(false,
 453                               IndexType(2),
 454                               safeMin,
 455                               One,
 456                               T(_(j-1,j),_(j-1,j)),
 457                               One,
 458                               One,
 459                               Work(_(j-1,j), _(2,3)),
 460                               wr,
 461                               wi,
 462                               X(_(1,2),_(1,2)),
 463                               scale,
 464                               xNorm);
 465 
 466 //
 467 //                      Scale X to avoid overflow when updating
 468 //                      the right-hand side.
 469 //
 470                         if (xNorm>One) {
 471                             const ElementType beta = max(work(j-1), work(j));
 472                             if (beta>bigNum/xNorm) {
 473                                 const ElementType rec = One / xNorm;
 474                                 X(1,1) *= rec;
 475                                 X(1,2) *= rec;
 476                                 X(2,1) *= rec;
 477                                 X(2,2) *= rec;
 478                                 scale *= rec;
 479                             }
 480                         }
 481 //
 482 //                      Scale if necessary
 483 //
 484                         if (scale!=One) {
 485                             work(_(1+n,ki+n))   *= scale;
 486                             work(_(1+n2,ki+n2)) *= scale;
 487                         }
 488                         work(j-1+n)  = X(1,1);
 489                         work(j+n)    = X(2,1);
 490                         work(j-1+n2) = X(1,2);
 491                         work(j+n2)   = X(2,2);
 492 //
 493 //                      Update the right-hand side
 494 //
 495                         work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
 496                         work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
 497 
 498                         work(_(1+n2,j-2+n2)) -= X(1,2)*T(_(1,j-2),j-1);
 499                         work(_(1+n2,j-2+n2)) -= X(2,2)*T(_(1,j-2),j);
 500                     }
 501                 }
 502 //
 503 //              Copy the vector x or Q*x to VR and normalize.
 504 //
 505                 if (!over) {
 506                     VR(_(1,ki),is-1) = work(_(1+n,ki+n));
 507                     VR(_(1,ki),is)   = work(_(1+n2,ki+n2));
 508 
 509                     ElementType eMax = Zero;
 510                     for (IndexType k=1; k<=ki; ++k) {
 511                         eMax = max(eMax, abs(VR(k,is-1))+abs(VR(k,is)));
 512                     }
 513 
 514                     const ElementType reMax = One / eMax;
 515                     VR(_(1,ki),is-1)  *= reMax; 
 516                     VR(_(1,ki),is)    *= reMax; 
 517 
 518                     VR(_(ki+1,n),is-1)  = Zero;
 519                     VR(_(ki+1,n),is)    = Zero;
 520 
 521                 } else {
 522 
 523                     if (ki>2) {
 524                         blas::mv(NoTrans, One,
 525                                  VR(_,_(1,ki-2)),
 526                                  work(_(1+n,ki-2+n)),
 527                                  work(ki-1+n),
 528                                  VR(_,ki-1));
 529                         blas::mv(NoTrans, One,
 530                                  VR(_,_(1,ki-2)),
 531                                  work(_(n2+1,n2+ki-2)),
 532                                  work(ki+n2),
 533                                  VR(_,ki));
 534                     } else {
 535                         VR(_,ki-1)  *= work(ki-1+n);
 536                         VR(_,ki)    *= work(ki+n2);
 537                     }
 538 
 539                     ElementType eMax = Zero;
 540                     for (IndexType k=1; k<=n; ++k) {
 541                         eMax = max(eMax, abs(VR(k,ki-1)) + abs(VR(k,ki)));
 542                     }
 543                     const ElementType reMax = One / eMax;
 544                     VR(_,ki-1)  *= reMax;
 545                     VR(_,ki)    *= reMax;
 546                 }
 547             }
 548 
 549             --is;
 550             if (ip!=0) {
 551                 --is;
 552             }
 553             if (ip==1) {
 554                 ip = 0;
 555             }
 556             if (ip==-1) {
 557                 ip = 1;
 558             }
 559         }
 560     }
 561 
 562     if (computeVL) {
 563 //
 564 //      Compute left eigenvectors.
 565 //
 566         IndexType ip = 0;
 567         IndexType is = 1;
 568         for (IndexType ki=1; ki<=n; ++ki) {
 569 
 570             if (ip==-1) {
 571                 ip = 0;
 572                 continue;
 573             }
 574             if (ki!=n && T(ki+1,ki)!=Zero) {
 575                 ip = 1;
 576             }
 577 
 578             if (someV) {
 579                 if (!select(ki)) {
 580                     if (ip==-1) {
 581                         ip = 0;
 582                     }
 583                     if (ip==1) {
 584                         ip = -1;
 585                     }
 586                     continue;
 587                 }
 588             }
 589 //
 590 //          Compute the KI-th eigenvalue (WR,WI).
 591 //
 592             ElementType wr = T(ki,ki);
 593             ElementType wi = Zero;
 594             if (ip!=0) {
 595                 wi = sqrt(abs(T(ki,ki+1))) * sqrt(abs(T(ki+1,ki)));
 596             }
 597             const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
 598 
 599             if (ip==0) {
 600 //
 601 //              Real left eigenvector.
 602 //
 603                 work(ki+n) = One;
 604 //
 605 //              Form right-hand side
 606 //
 607                 for (IndexType k=ki+1; k<=n; ++k) {
 608                     work(k+n) = -T(ki,k);
 609                 }
 610 //
 611 //              Solve the quasi-triangular system:
 612 //                 (T(KI+1:N,KI+1:N) - WR)**T*X = SCALE*WORK
 613 //
 614                 ElementType vMax = One;
 615                 ElementType vCrit = bigNum;
 616 
 617                 IndexType jNext = ki + 1;
 618                 for (IndexType j=ki+1; j<=n; ++j) {
 619                     if (j<jNext) {
 620                         continue;
 621                     }
 622                     IndexType j1 = j;
 623                     IndexType j2 = j;
 624                     jNext = j + 1;
 625                     if (j<n) {
 626                         if (T(j+1,j)!=Zero) {
 627                             j2 = j + 1;
 628                             jNext = j + 2;
 629                         }
 630                     }
 631 
 632                     if (j1==j2) {
 633 //
 634 //                      1-by-1 diagonal block
 635 //
 636 //                      Scale if necessary to avoid overflow when forming
 637 //                      the right-hand side.
 638 //
 639                         if (work(j)>vCrit) {
 640                             const ElementType rec = One / vMax;
 641                             work(_(ki+n,n2)) *= rec;
 642                             vMax = One;
 643                             vCrit = bigNum;
 644                         }
 645 
 646                         work(j+n) -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
 647 //
 648 //                      Solve (T(J,J)-WR)**T*X = WORK
 649 //
 650                         laln2(false,
 651                               IndexType(1),
 652                               safeMin,
 653                               One,
 654                               T(_(j,j),_(j,j)),
 655                               One,
 656                               One,
 657                               Work(_(j,j), _(2,2)),
 658                               wr,
 659                               Zero,
 660                               X(_(1,1),_(1,1)),
 661                               scale,
 662                               xNorm);
 663 
 664 //
 665 //                      Scale if necessary
 666 //
 667                         if (scale!=One) {
 668                             work(_(ki+n,n2)) *= scale;
 669                         }
 670                         work(j+n) = X(1,1);
 671                         vMax = max(abs(work(j+n)), vMax);
 672                         vCrit = bigNum / vMax;
 673 
 674                     } else {
 675 //
 676 //                      2-by-2 diagonal block
 677 //
 678 //                      Scale if necessary to avoid overflow when forming
 679 //                      the right-hand side.
 680 //
 681                         const ElementType beta = max(work(j), work(j+1));
 682                         if (beta>vCrit) {
 683                             const ElementType rec = One / vMax;
 684                             work(_(ki+n,n2)) *= rec;
 685                             vMax = One;
 686                             vCrit = bigNum;
 687                         }
 688 
 689                         work(j+n)   -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
 690                         work(j+1+n) -= T(_(ki+1,j-1),j+1)*work(_(ki+1+n,n+j-1));
 691 //
 692 //                      Solve
 693 //                       [T(J,J)-WR   T(J,J+1)     ]**T * X = SCALE*( WORK1 )
 694 //                       [T(J+1,J)    T(J+1,J+1)-WR]                ( WORK2 )
 695 //
 696                         laln2(true,
 697                               IndexType(1),
 698                               safeMin,
 699                               One,
 700                               T(_(j,j+1),_(j,j+1)),
 701                               One,
 702                               One,
 703                               Work(_(j,j+1), _(2,2)),
 704                               wr,
 705                               Zero,
 706                               X(_(1,2),_(1,1)),
 707                               scale,
 708                               xNorm);
 709 
 710 //
 711 //                      Scale if necessary
 712 //
 713                         if (scale!=One) {
 714                             work(_(ki+n,n2)) *= scale;
 715                         }
 716                         work(j+n)   = X(1,1);
 717                         work(j+1+n) = X(2,1);
 718 
 719                         vMax = max(abs(work(j+n)), abs(work(j+1+n)), vMax);
 720                         vCrit = bigNum / vMax;
 721                     }
 722                 }
 723 //
 724 //              Copy the vector x or Q*x to VL and normalize.
 725 //
 726                 if (!over) {
 727                     VL(_(ki,n),is) = work(_(ki+n,n2));
 728 
 729                     const IndexType ii = blas::iamax(VL(_(ki,n),is)) + ki - 1;
 730                     const ElementType reMax = One / abs(VL(ii,is));
 731                     VL(_(ki,n),is) *= reMax;
 732 
 733                     VL(_(1,ki-1),is) = Zero;
 734 
 735                 } else {
 736 
 737                     if (ki<n) {
 738                         blas::mv(NoTrans, One,
 739                                  VL(_,_(ki+1,n)),
 740                                  work(_(ki+1+n,n2)),
 741                                  work(ki+n),
 742                                  VL(_,ki));
 743                     }
 744 
 745                     const IndexType ii = blas::iamax(VL(_,ki));
 746                     const ElementType reMax = One / abs(VL(ii,ki));
 747                     VL(_,ki) *= reMax;
 748 
 749                 }
 750 
 751             } else {
 752 //
 753 //              Complex left eigenvector.
 754 //
 755 //              Initial solve:
 756 //                ((T(KI,KI)    T(KI,KI+1) )**T - (WR - I* WI))*X = 0.
 757 //                ((T(KI+1,KI) T(KI+1,KI+1))                )
 758 //
 759                 if (abs(T(ki,ki+1))>=abs(T(ki+1,ki))) {
 760                     work(ki+n)    = wi / T(ki,ki+1);
 761                     work(ki+1+n2) = One;
 762                 } else {
 763                     work(ki+n)    = One;
 764                     work(ki+1+n2) = -wi / T(ki+1,ki);
 765                 }
 766                 work(ki+1+n) = Zero;
 767                 work(ki+n2)  = Zero;
 768 //
 769 //              Form right-hand side
 770 //
 771                 for (IndexType k=ki+2; k<=n; ++k) {
 772                     work(k+n)   = -work(ki+n)*T(ki,k);
 773                     work(k+n2)  = -work(ki+1+n2)*T(ki+1,k);
 774                 }
 775 //
 776 //              Solve complex quasi-triangular system:
 777 //              ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
 778 //
 779                 ElementType vMax = One;
 780                 ElementType vCrit = bigNum;
 781 
 782                 IndexType jNext = ki + 2;
 783                 for (IndexType j=ki+2; j<=n; ++j) {
 784                     if (j<jNext) {
 785                         continue;
 786                     }
 787                     IndexType j1 = j;
 788                     IndexType j2 = j;
 789                     jNext = j + 1;
 790                     if (j<n) {
 791                         if (T(j+1,j)!=Zero) {
 792                             j2 = j + 1;
 793                             jNext = j + 2;
 794                         }
 795                     }
 796 
 797                     if (j1==j2) {
 798 //
 799 //                      1-by-1 diagonal block
 800 //
 801 //                      Scale if necessary to avoid overflow when
 802 //                      forming the right-hand side elements.
 803 //
 804                         if (work(j)>vCrit) {
 805                             const ElementType rec = One / vMax;
 806                             work(_(ki+n,n2))    *= rec;
 807                             work(_(ki+n2,n2+n)) *= rec;
 808                             vMax = One;
 809                             vCrit = bigNum;
 810                         }
 811 
 812                         work(j+n)  -= T(_(ki+2,j-1),j)*work(_(ki+2+n,n+j-1));
 813                         work(j+n2) -= T(_(ki+2,j-1),j)*work(_(ki+2+n2,n2+j-1));
 814 //
 815 //                      Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
 816 //
 817                         laln2(true,
 818                               IndexType(2),
 819                               safeMin,
 820                               One,
 821                               T(_(j,j),_(j,j)),
 822                               One,
 823                               One,
 824                               Work(_(j,j), _(2,3)),
 825                               wr,
 826                               -wi,
 827                               X(_(1,1),_(1,2)),
 828                               scale,
 829                               xNorm);
 830 
 831 //
 832 //                      Scale if necessary
 833 //
 834                         if (scale!=One) {
 835                             work(_(ki+n,n2))    *= scale;
 836                             work(_(ki+n2,n2+n)) *= scale;
 837                         }
 838                         work(j+n) =  X(1,1);
 839                         work(j+n2) = X(1,2);
 840                         vMax = max(abs(work(j+n)), abs(work(j+n2)), vMax);
 841                         vCrit = bigNum / vMax;
 842 
 843                     } else {
 844 //
 845 //                      2-by-2 diagonal block
 846 //
 847 //                      Scale if necessary to avoid overflow when forming
 848 //                      the right-hand side elements.
 849 //
 850                         const ElementType beta = max(work(j), work(j+1));
 851                         if (beta>vCrit) {
 852                             const ElementType rec = One / vMax;
 853                             work(_(ki+n,n2))    *= rec;
 854                             work(_(ki+n2,n2+n)) *= rec;
 855                             vMax = One;
 856                             vCrit = bigNum;
 857                         }
 858 
 859                         const auto _work1 = work(_(ki+2+n,n+j-1));
 860                         const auto _work2 = work(_(ki+2+n2,n2+j-1));
 861 
 862                         work(j+n)   -= T(_(ki+2,j-1),j)   * _work1;
 863                         work(j+n2)  -= T(_(ki+2,j-1),j)   * _work2;
 864                         work(j+1+n) -= T(_(ki+2,j-1),j+1) * _work1;
 865                         work(j+1+n2)-= T(_(ki+2,j-1),j+1) * _work2;
 866 //
 867 //                      Solve 2-by-2 complex linear equation
 868 //                      ([T(j,j)   T(j,j+1)  ]**T-(wr-i*wi)*I)*X = SCALE*B
 869 //                      ([T(j+1,j) T(j+1,j+1)]               )
 870 //
 871                         laln2(true,
 872                               IndexType(2),
 873                               safeMin,
 874                               One,
 875                               T(_(j,j+1),_(j,j+1)),
 876                               One,
 877                               One,
 878                               Work(_(j,j+1), _(2,3)),
 879                               wr,
 880                               -wi,
 881                               X(_(1,2),_(1,2)),
 882                               scale,
 883                               xNorm);
 884 //
 885 //                      Scale if necessary
 886 //
 887                         if (scale!=One) {
 888                             work(_(ki+n,n2))    *= scale;
 889                             work(_(ki+n2,n2+n)) *= scale;
 890                         }
 891                         work(j+n)    = X(1,1);
 892                         work(j+n2)   = X(1,2);
 893                         work(j+1+n)  = X(2,1);
 894                         work(j+1+n2) = X(2,2);
 895                         vMax = max(abs(X(1,1)), abs(X(1,2)),
 896                                    abs(X(2,1)), abs(X(2,2)),
 897                                    vMax);
 898                         vCrit = bigNum / vMax;
 899 
 900                     }
 901                 }
 902 //
 903 //              Copy the vector x or Q*x to VL and normalize.
 904 //
 905                 if (!over) {
 906                     VL(_(ki,n),is)   = work(_(ki+n,n2));
 907                     VL(_(ki,n),is+1) = work(_(ki+n2,n2+n));
 908 
 909                     ElementType eMax = Zero;
 910                     for (IndexType k=ki; k<=n; ++k) {
 911                         eMax = max(eMax, abs(VL(k,is)) + abs(VL(k,is+1)));
 912                     }
 913                     const ElementType reMax = One / eMax;
 914                     VL(_(ki,n),is)   *= reMax;
 915                     VL(_(ki,n),is+1) *= reMax;
 916 
 917                     VL(_(1,ki-1),is)   = Zero;
 918                     VL(_(1,ki-1),is+1) = Zero;
 919                 } else {
 920                     if (ki<n-1) {
 921                         blas::mv(NoTrans, One,
 922                                  VL(_,_(ki+2,n)),
 923                                  work(_(ki+2+n,n2)),
 924                                  work(ki+n),
 925                                  VL(_,ki));
 926                         blas::mv(NoTrans, One,
 927                                  VL(_,_(ki+2,n)),
 928                                  work(_(ki+2+n2,n2+n)),
 929                                  work(ki+1+n2),
 930                                  VL(_,ki+1));
 931                     } else {
 932                         VL(_,ki)    *= work(ki+n);
 933                         VL(_,ki+1)  *= work(ki+1+n2);
 934                     }
 935 
 936                     ElementType eMax = Zero;
 937                     for (IndexType k=1; k<=n; ++k) {
 938                         eMax = max(eMax, abs(VL(k,ki)) + abs(VL(k,ki+1)));
 939                     }
 940                     const ElementType reMax = One / eMax;
 941                     VL(_,ki)   *= reMax;
 942                     VL(_,ki+1) *= reMax;
 943                 }
 944             }
 945 
 946             ++is;
 947             if (ip!=0) {
 948                 ++is;
 949             }
 950             if (ip==-1) {
 951                 ip = 0;
 952             }
 953             if (ip==1) {
 954                 ip = -1;
 955             }
 956         }
 957     }
 958 }
 959 
 960 //== interface for native lapack ===============================================
 961 
 962 #ifdef CHECK_CXXLAPACK
 963 
 964 template <typename VSELECT, typename MT, typename MVL, typename MVR,
 965           typename IndexType, typename VWORK>
 966 void
 967 trevc_native(bool                           computeVL,
 968              bool                           computeVR,
 969              TREVC::Job                     howMany,
 970              DenseVector<VSELECT>           &select,
 971              const GeMatrix<MT>             &T,
 972              GeMatrix<MVL>                  &VL,
 973              GeMatrix<MVR>                  &VR,
 974              IndexType                      mm,
 975              IndexType                      &m,
 976              DenseVector<VWORK>             &work)
 977 {
 978     typedef typename GeMatrix<MT>::ElementType     ElementType;
 979 
 980     char             SIDE;
 981     if (computeVL && computeVR) {
 982         SIDE = 'B';
 983     } else if (computeVL) {
 984         SIDE = 'L';
 985     } else if (computeVR) {
 986         SIDE = 'R';
 987     }
 988 
 989     const char       HOWMNY = getF77LapackChar<TREVC::Job>(howMany);
 990     LOGICAL          *SELECT = 0;
 991     const INTEGER    N = T.numRows();
 992     const INTEGER    LDT = T.leadingDimension();
 993     const INTEGER    LDVL = VL.leadingDimension();
 994     const INTEGER    LDVR = VR.leadingDimension();
 995     const INTEGER    MM = mm;
 996     INTEGER          M = m;
 997     INTEGER          INFO;
 998 
 999     if (howMany==TREVC::Selected) {
1000         SELECT = new LOGICAL[N];
1001         for (INTEGER i=1; i<=N; ++i) {
1002             SELECT[i] = select(i);
1003         }
1004     }
1005 
1006     if (IsSame<ElementType,DOUBLE>::value) {
1007         LAPACK_IMPL(dtrevc)(&SIDE,
1008                             &HOWMNY,
1009                             SELECT,
1010                             &N,
1011                             T.data(),
1012                             &LDT,
1013                             VL.data(),
1014                             &LDVL,
1015                             VR.data(),
1016                             &LDVR,
1017                             &MM,
1018                             &M,
1019                             work.data(),
1020                             &INFO);
1021     } else {
1022         ASSERT(0);
1023     }
1024 
1025     if (howMany==TREVC::Selected) {
1026         ASSERT(SELECT);
1027         delete [] SELECT;
1028     }
1029 
1030     ASSERT(INFO>=0);
1031 }
1032 
1033 #endif // CHECK_CXXLAPACK
1034 
1035 //== public interface ==========================================================
1036 
1037 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1038           typename IndexType, typename VWORK>
1039 void
1040 trevc(bool                          computeVL,
1041       bool                          computeVR,
1042       TREVC::Job                    howMany,
1043       DenseVector<VSELECT>          &select,
1044       const GeMatrix<MT>            &T,
1045       GeMatrix<MVL>                 &VL,
1046       GeMatrix<MVR>                 &VR,
1047       IndexType                     mm,
1048       IndexType                     &m,
1049       DenseVector<VWORK>            &work)
1050 {
1051     LAPACK_DEBUG_OUT("BEGIN: trevc");
1052 
1053     typedef typename GeMatrix<MT>::ElementType ElementType;
1054 //
1055 //  Test the input parameters
1056 //
1057     ASSERT(T.firstRow()==1);
1058     ASSERT(T.firstCol()==1);
1059     ASSERT(T.numRows()==T.numCols());
1060     ASSERT(VL.firstRow()==1);
1061     ASSERT(VL.firstCol()==1);
1062     ASSERT(VR.firstRow()==1);
1063     ASSERT(VR.firstCol()==1);
1064 #   ifdef CHECK_CXXLAPACK
1065 //
1066 //  Make copies of output arguments
1067 //
1068     typename DenseVector<VSELECT>::NoView   select_org = select;
1069     typename GeMatrix<MVL>::NoView          VL_org     = VL;
1070     typename GeMatrix<MVR>::NoView          VR_org     = VR;
1071     typename DenseVector<VWORK>::NoView     work_org   = work;
1072 #   endif
1073 
1074 //
1075 //  Call implementation
1076 //
1077     trevc_generic(computeVL, computeVR, howMany, select,
1078                   T, VL, VR, mm, m, work);
1079 
1080 #   ifdef CHECK_CXXLAPACK
1081 //
1082 //  Make copies of results computed by the generic implementation
1083 //
1084     typename DenseVector<VSELECT>::NoView   select_generic = select;
1085     typename GeMatrix<MVL>::NoView          VL_generic     = VL;
1086     typename GeMatrix<MVR>::NoView          VR_generic     = VR;
1087     typename DenseVector<VWORK>::NoView     work_generic   = work;
1088 //
1089 //  restore output arguments
1090 //
1091     select = select_org;
1092     VL     = VL_org;
1093     VR     = VR_org;
1094     work   = work_org;
1095 //
1096 //  Compare results
1097 //
1098     trevc_native(computeVL, computeVR, howMany, select,
1099                  T, VL, VR, mm, m, work);
1100 
1101     bool failed = false;
1102     if (! isIdentical(VL_generic, VL, "VL_generic""VL")) {
1103         std::cerr << "CXXLAPACK: VL_generic = " << VL_generic << std::endl;
1104         std::cerr << "F77LAPACK: VL = " << VL << std::endl;
1105         failed = true;
1106     }
1107 
1108     if (! isIdentical(VR_generic, VR, "VR_generic""_VR")) {
1109         std::cerr << "CXXLAPACK: VR_generic = " << VR_generic << std::endl;
1110         std::cerr << "F77LAPACK: VR = " << VR << std::endl;
1111         failed = true;
1112     }
1113 
1114     if (! isIdentical(work_generic, work, "work_generic""work")) {
1115         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
1116         std::cerr << "F77LAPACK: work = " << work << std::endl;
1117         failed = true;
1118     }
1119 
1120     if (failed) {
1121         ASSERT(0);
1122     }
1123 #   endif
1124     LAPACK_DEBUG_OUT("END: trevc");
1125 }
1126 
1127 //-- forwarding ----------------------------------------------------------------
1128 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1129           typename IndexType, typename VWORK>
1130 void
1131 trevc(bool                          computeVL,
1132       bool                          computeVR,
1133       TREVC::Job                    howMany,
1134       VSELECT                       &&select,
1135       const MT                      &T,
1136       MVL                           &&VL,
1137       MVR                           &&VR,
1138       IndexType                     &&m,
1139       VWORK                         &&work)
1140 {
1141     CHECKPOINT_ENTER;
1142     trevc(computeVL, computeVR, howMany, select, T, VL, VR, m, work);
1143     CHECKPOINT_LEAVE;
1144 }
1145 
1146 } } // namespace lapack, flens
1147 
1148 #endif // FLENS_LAPACK_EIG_TREVC_TCC