1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34       SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
 35      $                   ILOZ, IHIZ, Z, LDZ, INFO )
 36  *
 37  *  -- LAPACK auxiliary routine (version 3.2) --
 38  *     Univ. of Tennessee, Univ. of California Berkeley,
 39  *     Univ. of Colorado Denver and NAG Ltd..
 40  *     November 2006
 41  */
 42 
 43 #ifndef FLENS_LAPACK_EIG_LAHQR_TCC
 44 #define FLENS_LAPACK_EIG_LAHQR_TCC 1
 45 
 46 #include <cmath>
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename IndexType, typename MH, typename VWR, typename VWI,
 55           typename MZ>
 56 IndexType
 57 lahqr_generic(bool                  wantT,
 58               bool                  wantZ,
 59               IndexType             iLo,
 60               IndexType             iHi,
 61               GeMatrix<MH>          &H,
 62               DenseVector<VWR>      &wr,
 63               DenseVector<VWI>      &wi,
 64               IndexType             iLoZ,
 65               IndexType             iHiZ,
 66               GeMatrix<MZ>          &Z)
 67 {
 68     using std::abs;
 69     using std::max;
 70     using std::min;
 71 
 72     typedef typename GeMatrix<MH>::ElementType  T;
 73 
 74     const Underscore<IndexType>     _;
 75     const T                         Zero(0), One(1), Two(2);
 76     const T                         Dat1 = T(3)/T(4),
 77                                     Dat2 = T(-0.4375);
 78     const IndexType                 itMax = 30;
 79     const IndexType                 n = H.numRows();
 80 
 81     typedef typename GeMatrix<MH>::VectorView   VectorView;
 82     T           vBuffer[3];
 83     VectorView  v = typename VectorView::Engine(3, vBuffer);
 84 
 85 //
 86 //  Quick return if possible
 87 //
 88     if (n==0) {
 89         return 0;
 90     }
 91     if (iLo==iHi) {
 92         wr(iLo) = H(iLo, iLo);
 93         wi(iLo) = Zero;
 94         return 0;
 95     }
 96 //
 97 //  ==== clear out the trash ====
 98 //
 99     for (IndexType j=iLo; j<=iHi-3; ++j) {
100         H(j+2, j) = Zero;
101         H(j+3, j) = Zero;
102     }
103     if (iLo<=iHi-2) {
104         H(iHi, iHi-2) = Zero;
105     }
106 
107     const IndexType nh = iHi - iLo + 1;
108 //
109 //  Set machine-dependent constants for the stopping criterion.
110 //
111     T safeMin = lamch<T>(SafeMin);
112     T safeMax = One / safeMin;
113     labad(safeMin, safeMax);
114     const T ulp = lamch<T>(Precision);
115     const T smallNum = safeMin*(T(nh)/ulp);
116 //
117 //  i1 and i2 are the indices of the first row and last column of H
118 //  to which transformations must be applied. If eigenvalues only are
119 //  being computed, i1 and i2 are set inside the main loop.
120 //
121     IndexType i1 = -1, i2 = -1;
122     if (wantT) {
123         i1 = 1;
124         i2 = n;
125     }
126 //
127 //  The main loop begins here. Variable i is the loop index and decreases from
128 //  iHi to iLo in steps of 1 or 2. Each iteration of the loop works
129 //  with the active submatrix in rows and columns l to i.
130 //  Eigenvalues i+1 to iHi have already converged. Either l = iLo or
131 //  H(l,l-1) is negligible so that the matrix splits.
132 //
133     IndexType i = iHi;
134     while (true) {
135         IndexType l = iLo;
136         if (i<iLo) {
137             break;
138         }
139 //
140 //      Perform QR iterations on rows and columns iLo to i until a
141 //      submatrix of order 1 or 2 splits off at the bottom because a
142 //      subdiagonal element has become negligible.
143 //
144         IndexType its;
145         for (its=0; its<=itMax; ++its) {
146 //
147 //          Look for a single small subdiagonal element.
148 //
149             IndexType k;
150             for (k=i; k>=l+1; --k) {
151                 if (abs(H(k,k-1))<=smallNum) {
152                     break;
153                 }
154                 T test = abs(H(k-1,k-1)) + abs(H(k,k));
155                 if (test==Zero) {
156                     if (k-2>=iLo) {
157                         test += abs(H(k-1,k-2));
158                     }
159                     if (k+1<=iHi) {
160                         test += abs(H(k+1,k));
161                     }
162                 }
163 //              ==== The following is a conservative small subdiagonal
164 //              .    deflation  criterion due to Ahues & Tisseur (LAWN 122,
165 //              .    1997). It has better mathematical foundation and
166 //              .    improves accuracy in some cases.  ====
167                 if (abs(H(k,k-1))<=ulp*test) {
168                     const T ab = max(abs(H(k, k-1)), abs(H(k-1, k)));
169                     const T ba = min(abs(H(k, k-1)), abs(H(k-1, k)));
170                     const T aa = max(abs(H(k,   k)), abs(H(k-1, k-1)-H(k, k)));
171                     const T bb = min(abs(H(k,   k)), abs(H(k-1, k-1)-H(k, k)));
172                     const T s = aa + ab;
173                     if (ba*(ab/s)<=max(smallNum, ulp*(bb*(aa/s)))) {
174                         break;
175                     }
176                 }
177             }
178             l = k;
179 
180             if (l>iLo) {
181 //
182 //              H(l,l-1) is negligible
183 //
184                 H(l, l-1) = Zero;
185             }
186 //
187 //          Exit from loop if a submatrix of order 1 or 2 has split off.
188 //
189             if (l>=i-1) {
190                 break;
191             }
192 //
193 //          Now the active submatrix is in rows and columns l to i. If
194 //          eigenvalues only are being computed, only the active submatrix
195 //          need be transformed.
196 //
197             if (!wantT) {
198                 i1 = l;
199                 i2 = i;
200             }
201 
202             T H11, H12, H21, H22;
203             T rt1r, rt2r, rt1i, rt2i;
204             if (its==10) {
205 //
206 //              Exceptional shift.
207 //
208                 const T s = abs(H(l+1,l)) + abs(H(l+2, l+1));
209                 H11 = Dat1*s + H(l,l);
210                 H12 = Dat2*s;
211                 H21 = s;
212                 H22 = H11;
213             } else if (its==20) {
214 //
215 //              Exceptional shift.
216 //
217                 const T s = abs(H(i,i-1)) + abs(H(i-1,i-2));
218                 H11 = Dat1*s + H(i,i);
219                 H12 = Dat2*s;
220                 H21 = s;
221                 H22 = H11;
222             } else {
223 //
224 //              Prepare to use Francis' double shift
225 //              (i.e. 2nd degree generalized Rayleigh quotient)
226 //
227                 H11 = H(i-1, i-1);
228                 H21 = H(  i, i-1);
229                 H12 = H(i-1,   i);
230                 H22 = H(  i,   i);
231             }
232 
233             const T s = abs(H11) + abs(H12) + abs(H21) + abs(H22);
234             if (s==Zero) {
235                 rt1r = Zero;
236                 rt1i = Zero;
237                 rt2r = Zero;
238                 rt2i = Zero;
239             } else {
240                 H11 /= s;
241                 H21 /= s;
242                 H12 /= s;
243                 H22 /= s;
244                 const T tr = (H11+H22) / Two;
245                 const T det = (H11-tr)*(H22-tr) - H12*H21;
246                 const T rtDisc = sqrt(abs(det));
247                 if (det>=Zero) {
248 //
249 //                  ==== complex conjugate shifts ====
250 //
251                     rt1r = tr*s;
252                     rt2r = rt1r;
253                     rt1i = rtDisc*s;
254                     rt2i = -rt1i;
255                 } else {
256 //
257 //                  ==== real shifts (use only one of them)  ====
258 //
259                     rt1r = tr + rtDisc;
260                     rt2r = tr - rtDisc;
261                     if (abs(rt1r-H22)<=abs(rt2r-H22)) {
262                         rt1r *= s;
263                         rt2r = rt1r;
264                     } else {
265                         rt2r *= s;
266                         rt1r = rt2r;
267                     }
268                     rt1i = Zero;
269                     rt2i = Zero;
270                 }
271             }
272 //
273 //          Look for two consecutive small subdiagonal elements.
274 //
275             IndexType m;
276             for (m=i-2; m>=l; --m) {
277 //              Determine the effect of starting the double-shift QR
278 //              iteration at row m, and see if this would make H(m,m-1)
279 //              negligible.  (The following uses scaling to avoid
280 //              overflows and most underflows.)
281 //
282                 T H21S = H(m+1,m);
283                 T s = abs(H(m,m)-rt2r) + abs(rt2i) + abs(H21S);
284 
285                 H21S = H(m+1,m)/s;
286                 v(1) = H21S*H(m,m+1)
287                      + (H(m,m)-rt1r)*((H(m,m)-rt2r)/s)
288                      - rt1i*(rt2i/s);
289                 v(2) = H21S*(H(m,m) + H(m+1,m+1)-rt1r-rt2r);
290                 v(3) = H21S*H(m+2,m+1);
291 
292                 s = abs(v(1)) + abs(v(2)) + abs(v(3));
293                 v(1) /= s;
294                 v(2) /= s;
295                 v(3) /= s;
296 
297                 if (m==l) {
298                     break;
299                 }
300 
301                 const T value1 = abs(H(m,m-1))*(abs(v(2))+abs(v(3)));
302                 const T value2 = ulp*abs(v(1))
303                                  *(abs(H(m-1,m-1))+abs(H(m,m))+abs(H(m+1,m+1)));
304                 if (value1<=value2) {
305                     break;
306                 }
307             }
308 //
309 //          Double-shift QR step
310 //
311             for (k=m; k<=i-1; ++k) {
312 //
313 //              The first iteration of this loop determines a reflection G
314 //              from the vector v and applies it from left and right to H,
315 //              thus creating a nonzero bulge below the subdiagonal.
316 //
317 //              Each subsequent iteration determines a reflection G to
318 //              restore the Hessenberg form in the (k-1)th column, and thus
319 //              chases the bulge one step toward the bottom of the active
320 //              submatrix. 'nr' is the order of G.
321 //
322                 T   t1, t2, t3, v2, v3;
323 
324                 IndexType nr = min(IndexType(3), i-k+1);
325                 if (k>m) {
326                     v(_(1,nr)) = H(_(k,k+nr-1),k-1);
327                 }
328                 larfg(nr, v(1), v(_(2,nr)), t1);
329                 if (k>m) {
330                     H(k,  k-1) = v(1);
331                     H(k+1,k-1) = Zero;
332                     if (k<i-1) {
333                         H(k+2,k-1) = Zero;
334                     }
335                 } else if (m>l) {
336 //                  ==== Use the following instead of
337 //                  .    H( K, K-1 ) = -H( K, K-1 ) to
338 //                  .    avoid a bug when v(2) and v(3)
339 //                  .    underflow. ====
340                     H(k, k-1) *= (One-t1);
341                 }
342                 v2 = v(2);
343                 t2 = t1*v2;
344                 if (nr==3) {
345                     v3 = v(3);
346                     t3 = t1*v3;
347 //
348 //                  Apply G from the left to transform the rows of the matrix
349 //                  in columns k to i2.
350 //
351                     for (IndexType j=k; j<=i2; ++j) {
352                         const T sum = H(k,j) + v2*H(k+1,j) + v3*H(k+2,j);
353                         H(k,   j) -= sum*t1;
354                         H(k+1, j) -= sum*t2;
355                         H(k+2, j) -= sum*t3;
356                     }
357 //
358 //                  Apply G from the right to transform the columns of the
359 //                  matrix in rows i1 to min(k+3,i).
360 //
361                     for (IndexType j=i1; j<=min(k+3,i); ++j) {
362                         const T sum = H(j, k) + v2*H(j,k+1) + v3*H(j,k+2);
363                         H(j, k  ) -= sum*t1;
364                         H(j, k+1) -= sum*t2;
365                         H(j, k+2) -= sum*t3;
366                     }
367 
368                     if (wantZ) {
369 //
370 //                      Accumulate transformations in the matrix Z
371 //
372                         for (IndexType j=iLoZ; j<=iHiZ; ++j) {
373                             const T sum = Z(j, k) + v2*Z(j, k+1) + v3*Z(j, k+2);
374                             Z(j, k  ) -= sum*t1;
375                             Z(j, k+1) -= sum*t2;
376                             Z(j, k+2) -= sum*t3;
377                         }
378                     }
379                 } else if (nr==2) {
380 //
381 //                  Apply G from the left to transform the rows of the matrix
382 //                  in columns K to I2.
383 //
384                     for (IndexType j=k; j<=i2; ++j) {
385                         const T sum = H(k, j) + v2*H(k+1, j);
386                         H(k,   j) -= sum*t1;
387                         H(k+1, j) -= sum*t2;
388                     }
389 //
390 //                  Apply G from the right to transform the columns of the
391 //                  matrix in rows i1 to min(k+3,i).
392 //
393                     for (IndexType j=i1; j<=i; ++j) {
394                         const T sum = H(j, k) + v2*H(j, k+1);
395                         H(j, k  ) -= sum*t1;
396                         H(j, k+1) -= sum*t2;
397                     }
398 
399                     if (wantZ) {
400 //
401 //                      Accumulate transformations in the matrix Z
402 //
403                         for (IndexType j=iLoZ; j<=iHiZ; ++j) {
404                             const T sum = Z(j, k) + v2*Z(j, k+1);
405                             Z(j, k  ) -= sum*t1;
406                             Z(j, k+1) -= sum*t2;
407                         }
408                     }
409                 }
410             }
411         }
412 //
413 //      Failure to converge in remaining number of iterations
414 //
415         if (its>itMax) {
416             return i;
417         }
418 
419         if (l==i) {
420 //
421 //          H(I,I-1) is negligible: one eigenvalue has converged.
422 //
423             wr(i) = H(i, i);
424             wi(i) = Zero;
425         } else if (l==i-1) {
426 //
427 //          H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
428 //
429 //          Transform the 2-by-2 submatrix to standard Schur form,
430 //          and compute and store the eigenvalues.
431 //
432             T cs, sn;
433             lanv2(H(i-1,i-1), H(i-1,i), H(i,i-1), H(i,i),
434                   wr(i-1), wi(i-1), wr(i), wi(i),
435                   cs, sn);
436 
437             if (wantT) {
438 //
439 //              Apply the transformation to the rest of H.
440 //
441                 if (i2>i) {
442                     const auto cols = _(i+1,i2);
443                     blas::rot(H(i-1,cols), H(i,cols), cs, sn);
444                 }
445                 const auto rows = _(i1,i-2);
446                 blas::rot(H(rows, i-1), H(rows, i), cs, sn);
447             }
448             if (wantZ) {
449 //
450 //              Apply the transformation to Z.
451 //
452                 const auto rows = _(iLoZ, iHiZ);
453                 blas::rot(Z(rows, i-1), Z(rows, i), cs, sn);
454             }
455         }
456 //
457 //      return to start of the main loop with new value of I.
458 //
459         i = l - 1;
460     }
461     return 0;
462 }
463 
464 //== interface for native lapack ===============================================
465 
466 #ifdef CHECK_CXXLAPACK
467 
468 template <typename IndexType, typename MH, typename VWR, typename VWI,
469           typename MZ>
470 IndexType
471 lahqr_native(bool                   wantT,
472              bool                   wantZ,
473              IndexType              iLo,
474              IndexType              iHi,
475              GeMatrix<MH>           &H,
476              DenseVector<VWR>       &wr,
477              DenseVector<VWI>       &wi,
478              IndexType              iLoZ,
479              IndexType              iHiZ,
480              GeMatrix<MZ>           &Z)
481 {
482     typedef typename GeMatrix<MH>::ElementType  T;
483 
484     const LOGICAL    WANTT  = wantT;
485     const LOGICAL    WANTZ  = wantZ;
486     const INTEGER    N      = H.numRows();
487     const INTEGER    ILO    = iLo;
488     const INTEGER    IHI    = iHi;
489     const INTEGER    LDH    = H.leadingDimension();
490     const INTEGER    ILOZ   = iLoZ;
491     const INTEGER    IHIZ   = iHiZ;
492     const INTEGER    LDZ    = Z.leadingDimension();
493     INTEGER          INFO;
494 
495     if (IsSame<T,DOUBLE>::value) {
496         LAPACK_IMPL(dlahqr)(&WANTT,
497                             &WANTZ,
498                             &N,
499                             &ILO,
500                             &IHI,
501                             H.data(),
502                             &LDH,
503                             wr.data(),
504                             wi.data(),
505                             &ILOZ,
506                             &IHIZ,
507                             Z.data(),
508                             &LDZ,
509                             &INFO);
510     } else {
511         ASSERT(0);
512     }
513     return INFO;
514 }
515 
516 #endif // CHECK_CXXLAPACK
517 
518 //== public interface ==========================================================
519 
520 template <typename IndexType, typename MH, typename VWR, typename VWI,
521           typename MZ>
522 IndexType
523 lahqr(bool                  wantT,
524       bool                  wantZ,
525       IndexType             iLo,
526       IndexType             iHi,
527       GeMatrix<MH>          &H,
528       DenseVector<VWR>      &wr,
529       DenseVector<VWI>      &wi,
530       IndexType             iLoZ,
531       IndexType             iHiZ,
532       GeMatrix<MZ>          &Z)
533 {
534     LAPACK_DEBUG_OUT("lahqr");
535 
536 //
537 //  Test the input parameters
538 //
539     using std::max;
540 
541     ASSERT(H.firstRow()==1);
542     ASSERT(H.firstCol()==1);
543     ASSERT(H.numRows()==H.numCols());
544     ASSERT(wr.firstIndex()==1);
545     ASSERT(wr.length()==H.numRows());
546     ASSERT(wi.firstIndex()==1);
547     ASSERT(wi.length()==H.numRows());
548     ASSERT(wantZ || (Z.numRows()==H.numCols()));
549     ASSERT(wantZ || (Z.numCols()==H.numCols()));
550 
551     // 1 <= ILO <= max(1,IHI); IHI <= N.
552     ASSERT(1<=iLo);
553     ASSERT(iLo<=max(IndexType(1), iHi));
554     ASSERT(iHi<=H.numRows());
555 
556     // 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
557     ASSERT(1<=iLoZ);
558     ASSERT(iLoZ<=iLo);
559     ASSERT(iHi<=iHiZ);
560     ASSERT(iHiZ<=H.numRows());
561 
562 //
563 //  Make copies of output arguments
564 //
565 #   ifdef CHECK_CXXLAPACK
566     typename GeMatrix<MH>::NoView          H_org    = H;
567     typename GeMatrix<MH>::NoView          Z_org    = Z;
568 
569     typename GeMatrix<MH>::NoView          _H       = H;
570     typename DenseVector<VWR>::NoView      _wr      = wr;
571     typename DenseVector<VWI>::NoView      _wi      = wi;
572     typename GeMatrix<MZ>::NoView          _Z       = Z;
573 #   endif
574 
575 //
576 //  Call implementation
577 //
578     IndexType info = lahqr_generic(wantT, wantZ, iLo,  iHi,
579                                    H, wr, wi, iLoZ, iHiZ, Z);
580 
581 //
582 //  Compare results
583 //
584 #   ifdef CHECK_CXXLAPACK
585     IndexType _info = lahqr_native(wantT, wantZ, iLo,  iHi,
586                                    _H, _wr, _wi, iLoZ, iHiZ, _Z);
587 
588     bool failed = false;
589     if (! isIdentical(H, _H, " H""_H")) {
590         std::cerr << "CXXLAPACK:  H = " << H << std::endl;
591         std::cerr << "F77LAPACK: _H = " << _H << std::endl;
592         failed = true;
593     }
594 
595     if (! isIdentical(wr, _wr, " wr""_wr")) {
596         std::cerr << "CXXLAPACK:  wr = " << wr << std::endl;
597         std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
598         failed = true;
599     }
600 
601     if (! isIdentical(wi, _wi, " wi""_wi")) {
602         std::cerr << "CXXLAPACK:  wi = " << wi << std::endl;
603         std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
604         failed = true;
605     }
606 
607     if (! isIdentical(Z, _Z, " Z""_Z")) {
608         std::cerr << "CXXLAPACK:  Z = " << Z << std::endl;
609         std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
610         failed = true;
611     }
612 
613     if (! isIdentical(info, _info, " info""_info")) {
614         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
615         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
616         failed = true;
617     }
618 
619     if (failed) {
620         std::cerr << "H_org = " << H_org << std::endl;
621         std::cerr << "Z_org = " << Z_org << std::endl;
622         std::cerr << "wantT = " << wantT
623                   << ", wantZ = " << wantZ
624                   << ", iLo = " << iLo
625                   << ", iHi = " << iHi
626                   << std::endl;
627         std::cerr << "error in: lahqr.tcc" << std::endl;
628         ASSERT(0);
629     } else {
630 //        std::cerr << "passed: lahqr.tcc" << std::endl;
631     }
632 #   endif
633     return info;
634 }
635 
636 //-- forwarding ----------------------------------------------------------------
637 template <typename IndexType, typename MH, typename VWR, typename VWI,
638           typename MZ>
639 IndexType
640 lahqr(bool          wantT,
641       bool          wantZ,
642       IndexType     iLo,
643       IndexType     iHi,
644       MH            &&H,
645       VWR           &&wr,
646       VWI           &&wi,
647       IndexType     iLoZ,
648       IndexType     iHiZ,
649       MZ            &&Z)
650 {
651     return lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);
652 }
653 
654 } } // namespace lapack, flens
655 
656 #endif // FLENS_LAPACK_EIG_LAHQR_TCC