1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
 36      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
 37      $                   LDT, NV, WV, LDWV, WORK, LWORK )
 38  *
 39  *  -- LAPACK auxiliary routine (version 3.2.2)                        --
 40  *     Univ. of Tennessee, Univ. of California Berkeley,
 41  *     Univ. of Colorado Denver and NAG Ltd..
 42  *  -- June 2010                                                       --
 43  *
 44  */
 45 
 46 #ifndef FLENS_LAPACK_EIG_LAQR2_TCC
 47 #define FLENS_LAPACK_EIG_LAQR2_TCC 1
 48 
 49 #include <flens/blas/blas.h>
 50 #include <flens/lapack/lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename IndexType, typename MT>
 57 IndexType
 58 laqr2_generic_wsq(IndexType                 kTop,
 59                   IndexType                 kBot,
 60                   IndexType                 nw,
 61                   const GeMatrix<MT>        &T)
 62 {
 63     using std::max;
 64     using std::min;
 65 
 66     typedef typename GeMatrix<MT>::ElementType  ElementType;
 67 
 68     const Underscore<IndexType> _;
 69     
 70 //
 71 //  ==== Estimate optimal workspace. ====
 72 //
 73     IndexType jw = min(nw, kBot-kTop+1);
 74     auto  _T  = T(_(1,jw),_(1,jw));
 75 
 76     IndexType lWorkOpt;
 77     if (jw<=2) {
 78         lWorkOpt = 1;
 79     } else {
 80 //
 81 //      ==== Workspace query call to DGEHRD ====
 82 //
 83         IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
 84 //
 85 //      ==== Workspace query call to DORMHR ====
 86 //
 87         IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
 88 //
 89 //      ==== Optimal workspace ====
 90 //
 91         lWorkOpt = jw+max(lWork1,lWork2);
 92     }
 93     return lWorkOpt;
 94 }
 95 
 96 template <typename IndexType, typename MH, typename MZ, typename VSR,
 97           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
 98 void
 99 laqr2_generic(bool                      wantT,
100               bool                      wantZ,
101               IndexType                 kTop,
102               IndexType                 kBot,
103               IndexType                 nw,
104               GeMatrix<MH>              &H,
105               IndexType                 iLoZ,
106               IndexType                 iHiZ,
107               GeMatrix<MZ>              &Z,
108               IndexType                 &ns,
109               IndexType                 &nd,
110               DenseVector<VSR>          &sr,
111               DenseVector<VSI>          &si,
112               GeMatrix<MV>              &V,
113               GeMatrix<MT>              &T,
114               GeMatrix<MWV>             &WV,
115               DenseVector<VWORK>        &work)
116 {
117     using std::abs;
118     using std::max;
119     using std::min;
120 
121     typedef typename GeMatrix<MH>::ElementType  ElementType;
122 
123     const Underscore<IndexType> _;
124 
125     const ElementType   Zero(0), One(1);
126     const IndexType     n   = H.numRows();
127     const IndexType     nh  = T.numCols();
128     const IndexType     nv  = WV.numRows();
129 //
130 //  ==== Estimate optimal workspace. ====
131 //
132     IndexType   jw      = min(nw, kBot-kTop+1);
133     auto        T_jw    = T(_(1,jw),_(1,jw));
134     auto        V_jw    = V(_(1,jw),_(1,jw));
135 
136     IndexType lWorkOpt;
137     if (jw<=2) {
138         lWorkOpt = 1;
139     } else {
140 //
141 //      ==== Workspace query call to DGEHRD ====
142 //
143         IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
144 //
145 //      ==== Workspace query call to DORMHR ====
146 //
147         IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
148 //
149 //      ==== Optimal workspace ====
150 //
151         lWorkOpt = jw+max(lWork1,lWork2);
152     }
153 //
154 //  ==== Apply worksize query ====
155 //
156     if (work.length()==0) {
157         work.resize(lWorkOpt);
158     }
159     const IndexType lWork = work.length();
160 //
161 //  ==== Nothing to do ...
162 //  ... for an empty active block ... ====
163     ns = 0;
164     nd = 0;
165     work(1) = One;
166     if (kTop>kBot) {
167         return;
168     }
169 //   ... nor for an empty deflation window. ====
170     if (nw<1) {
171         return;
172     }
173 //
174 //  ==== Machine constants ====
175 //
176     ElementType safeMin   = lamch<ElementType>(SafeMin);
177     ElementType safeMax   = One / safeMin;
178     labad(safeMin, safeMax);
179     const ElementType ulp      = lamch<ElementType>(Precision);
180     const ElementType smallNum = safeMin*(ElementType(n)/ulp);
181 //
182 //  ==== Setup deflation window ====
183 //
184     IndexType   kwTop   = kBot - jw + 1;
185     auto        sr_kw   = sr(_(kwTop,kBot));
186     auto        si_kw   = si(_(kwTop,kBot));
187     auto        H_kw    = H(_(kwTop,kBot),_(kwTop,kBot));
188     ElementType s;
189 
190     if( kwTop==kTop) {
191         s = Zero;
192     } else {
193         s = H(kwTop, kwTop-1);
194     }
195 
196     if (kBot==kwTop) {
197 //
198 //      ==== 1-by-1 deflation window: not much to do ====
199 //
200         sr(kwTop) = H(kwTop,kwTop);
201         si(kwTop) = Zero;
202         ns = 1;
203         nd = 0;
204         if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
205             ns = 0;
206             nd = 1;
207             if (kwTop>kTop) {
208                 H(kwTop, kwTop-1) = Zero;
209             }
210         }
211         work(1) = One;
212         return;
213     }
214 //
215 //  ==== Convert to spike-triangular form.  (In case of a
216 //  .    rare QR failure, this routine continues to do
217 //  .    aggressive early deflation using that part of
218 //  .    the deflation window that converged using INFQR
219 //  .    here and there to keep track.) ====
220 //
221     T_jw.upper()  = H_kw.upper();
222     T_jw.diag(-1) = H_kw.diag(-1);
223 
224     V_jw          = Zero;
225     V_jw.diag(0)  = One;
226 
227     IndexType infoQR;
228     infoQR = lahqr(truetrue,
229                    IndexType(1), jw, T_jw,
230                    sr_kw, si_kw,
231                    IndexType(1), jw, V_jw);
232 //
233 //  ==== DTREXC needs a clean margin near the diagonal ====
234 //
235     for (IndexType j=1; j<=jw-3; ++j) {
236         T(j+2, j) = Zero;
237         T(j+3, j) = Zero;
238     }
239     if (jw>2) {
240         T(jw,jw-2) = Zero;
241     }
242 //
243 //  ==== Deflation detection loop ====
244 //
245     ns = jw;
246     IndexType iFirst;
247     IndexType iLast = infoQR + 1;
248     while (iLast<=ns) {
249         bool bulge;
250         if (ns==1) {
251             bulge = false;
252         } else {
253             bulge = T(ns,ns-1)!=Zero;
254         }
255 //
256 //      ==== Small spike tip test for deflation ====
257 //
258         if (!bulge) {
259 //
260 //          ==== Real eigenvalue ====
261 //
262             ElementType foo = abs(T(ns,ns));
263             if (foo==Zero) {
264                 foo = abs(s);
265             }
266             if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
267 //
268 //               ==== Deflatable ====
269 //
270                 ns = ns - 1;
271             } else {
272 //
273 //               ==== Undeflatable.   Move it up out of the way.
274 //               .    (DTREXC can not fail in this case.) ====
275 //
276                 iFirst = ns;
277                 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
278                 ++iLast;
279             }
280         } else {
281 //
282 //          ==== Complex conjugate pair ====
283 //
284             ElementType foo = abs(T(ns,ns))
285                             + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
286             if (foo==Zero) {
287                 foo = abs(s);
288             }
289             if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
290 //
291 //              ==== Deflatable ====
292 //
293                 ns -= 2;
294             } else {
295 //
296 //              ==== Undeflatable. Move them up out of the way.
297 //              .    Fortunately, DTREXC does the right thing with
298 //              .    ILST in case of a rare exchange failure. ====
299 //
300                 iFirst = ns;
301                 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
302                 iLast += 2;
303             }
304         }
305 //
306 //      ==== End deflation detection loop ====
307 //
308     }
309 //
310 //     ==== Return to Hessenberg form ====
311 //
312     if (ns==0) {
313         s = Zero;
314     }
315 
316     if (ns<jw) {
317 //
318 //     ==== sorting diagonal blocks of T improves accuracy for
319 //     .    graded matrices.  Bubble sort deals well with
320 //     .    exchange failures. ====
321 //
322         bool sorted = false;
323         IndexType i = ns + 1;
324         while (!sorted) {
325             sorted = true;
326 
327             IndexType kEnd = i - 1;
328             i = infoQR + 1;
329 
330             IndexType k;
331             if (i==ns) {
332                 k = i + 1;
333             } else if (T(i+1,i)==Zero) {
334                 k = i + 1;
335             } else {
336                 k = i + 2;
337             }
338             while (k<=kEnd) {
339                 ElementType evi, evk;
340 
341                 if (k==i+1) {
342                     evi = abs(T(i,i));
343                 } else {
344                     evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
345                 }
346 
347                 if (k==kEnd) {
348                     evk = abs(T(k,k));
349                 } else if(T(k+1,k)==Zero) {
350                     evk = abs(T(k,k));
351                 } else {
352                     evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
353                 }
354 
355                 if (evi>=evk) {
356                     i = k;
357                 } else {
358                     sorted = false;
359                     iFirst = i;
360                     iLast = k;
361                     IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
362                                            work(_(1,jw)));
363                     if (info==0) {
364                         i = iLast;
365                     } else {
366                         i = k;
367                     }
368                 }
369                 if (i==kEnd) {
370                     k = i + 1;
371                 } else if (T(i+1,i)==Zero) {
372                     k = i + 1;
373                 } else {
374                     k = i + 2;
375                 }
376             }
377         }
378     }
379 //
380 //  ==== Restore shift/eigenvalue array from T ====
381 //
382     IndexType i = jw;
383     while (i>=infoQR+1) {
384         if (i==infoQR+1) {
385             sr(kwTop+i-1) = T(i,i);
386             si(kwTop+i-1) = Zero;
387             i = i - 1;
388         } else if (T(i,i-1)==Zero) {
389             sr(kwTop+i-1 ) = T(i,i);
390             si(kwTop+i-1 ) = Zero;
391             i = i - 1;
392         } else {
393             ElementType aa = T(i-1,i-1);
394             ElementType cc = T(i,  i-1);
395             ElementType bb = T(i-1,i  );
396             ElementType dd = T(i,  i  );
397             ElementType cs, sn;
398             lanv2(aa, bb, cc, dd,
399                   sr(kwTop+i-2), si(kwTop+i-2),
400                   sr(kwTop+i-1), si(kwTop+i-1),
401                   cs, sn);
402             i -= 2;
403         }
404     }
405 
406     if (ns<jw || s==Zero) {
407         
408         if (ns>1 && s!=Zero) {
409 //
410 //          ==== Reflect spike back into lower triangle ====
411 //
412             work(_(1,ns)) = V(1,_(1,ns));
413             ElementType beta = work(1);
414             ElementType tau;
415 
416             larfg(ns, beta, work(_(2,ns)), tau);
417             work(1) = One;
418 
419             T(_(3,jw),_(1,jw-2)).lower() = Zero;
420 
421             const auto _v = work(_(1,ns));
422 
423             larf(Left,  _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
424             larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
425             larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
426 
427             hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
428         }
429 //
430 //      ==== Copy updated reduced window into place ====
431 //
432         if (kwTop>1) {
433             H(kwTop,kwTop-1) = s*V(1,1);
434         }
435         H_kw.upper()  = T_jw.upper();
436         H_kw.diag(-1) = T_jw.diag(-1);
437 //
438 //      ==== Accumulate orthogonal matrix in order update
439 //      .    H and Z, if requested.  ====
440 //
441         if (ns>1 && s!=Zero) {
442             ormhr(Right, NoTrans,
443                   IndexType(1), ns,
444                   T(_(1,ns),_(1,ns)),
445                   work(_(1,ns-1)),
446                   V(_(1,jw),_(1,ns)),
447                   work(_(jw+1,lWork)));
448         }
449 //
450 //      ==== Update vertical slab in H ====
451 //
452         const IndexType lTop = (wantT) ? 1 : kTop;
453 
454         for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
455             const IndexType kLn = min(nv,kwTop-kRow);
456             blas::mm(NoTrans, NoTrans, One,
457                      H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
458                      V(_(1,jw),_(1,jw)),
459                      Zero,
460                      WV(_(1,kLn),_(1,jw)));
461             H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
462         }
463 //
464 //      ==== Update horizontal slab in H ====
465 //
466         if (wantT) {
467             for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
468                 const IndexType kLn = min(nh,n-kCol+1);
469                 blas::mm(ConjTrans, NoTrans, One,
470                          V(_(1,jw),_(1,jw)),
471                          H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
472                          Zero,
473                          T(_(1,jw),_(1,kLn)));
474                 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
475             }
476         }
477 //
478 //      ==== Update vertical slab in Z ====
479 //
480         if (wantZ) {
481             for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
482                 const IndexType kLn = min(nv,iHiZ-kRow+1);
483                 blas::mm(NoTrans, NoTrans, One,
484                          Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
485                          V(_(1,jw),_(1,jw)),
486                          Zero,
487                          WV(_(1,kLn),_(1,jw)));
488                 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
489             }
490         }
491     }
492 //
493 //  ==== Return the number of deflations ... ====
494 //
495     nd = jw - ns;
496 //
497 //  ==== ... and the number of shifts. (Subtracting
498 //  .    INFQR from the spike length takes care
499 //  .    of the case of a rare QR failure while
500 //  .    calculating eigenvalues of the deflation
501 //  .    window.)  ====
502 //
503     ns = ns - infoQR;
504 //
505 //   ==== Return optimal workspace. ====
506 //
507     work(1) = lWorkOpt;
508 }
509 
510 //== interface for native lapack ===============================================
511 
512 #ifdef CHECK_CXXLAPACK
513 
514 template <typename IndexType, typename MT>
515 IndexType
516 laqr2_native_wsq(IndexType                 kTop,
517                  IndexType                 kBot,
518                  IndexType                 nw,
519                  const GeMatrix<MT>        &T)
520 {
521     typedef typename GeMatrix<MT>::ElementType  ElementType;
522 
523     const LOGICAL    WANTT  = false;
524     const LOGICAL    WANTZ  = false;
525     const INTEGER    N      = 1;
526     const INTEGER    KTOP   = kTop;
527     const INTEGER    KBOT   = kBot;
528     const INTEGER    NW     = nw;
529     const INTEGER    LDH    = 1;
530     const INTEGER    ILOZ   = 0;
531     const INTEGER    IHIZ   = 0;
532     const INTEGER    LDZ    = 1;
533     INTEGER          NS;
534     INTEGER          ND;
535     const INTEGER    LDV    = nw;
536     const INTEGER    NH     = T.numCols();
537     const INTEGER    LDT    = T.leadingDimension();
538     const INTEGER    NV     = nw;
539     const INTEGER    LDWV   = nw;
540     ElementType      WORK;
541     const INTEGER    LWORK  = -1;
542     ElementType      DUMMY;
543 
544     if (IsSame<ElementType,DOUBLE>::value) {
545         LAPACK_IMPL(dlaqr2)(&WANTT,
546                             &WANTZ,
547                             &N,
548                             &KTOP,
549                             &KBOT,
550                             &NW,
551                             &DUMMY,
552                             &LDH,
553                             &ILOZ,
554                             &IHIZ,
555                             &DUMMY,
556                             &LDZ,
557                             &NS,
558                             &ND,
559                             &DUMMY,
560                             &DUMMY,
561                             &DUMMY,
562                             &LDV,
563                             &NH,
564                             &DUMMY,
565                             &LDT,
566                             &NV,
567                             &DUMMY,
568                             &LDWV,
569                             &WORK,
570                             &LWORK);
571     } else {
572         ASSERT(0);
573     }
574     return WORK;
575 }
576 
577 template <typename IndexType, typename MH, typename MZ, typename VSR,
578           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
579 void
580 laqr2_native(bool                      wantT,
581              bool                      wantZ,
582              IndexType                 kTop,
583              IndexType                 kBot,
584              IndexType                 nw,
585              GeMatrix<MH>              &H,
586              IndexType                 iLoZ,
587              IndexType                 iHiZ,
588              GeMatrix<MZ>              &Z,
589              IndexType                 &ns,
590              IndexType                 &nd,
591              DenseVector<VSR>          &sr,
592              DenseVector<VSI>          &si,
593              GeMatrix<MV>              &V,
594              GeMatrix<MT>              &T,
595              GeMatrix<MWV>             &WV,
596              DenseVector<VWORK>        &work)
597 {
598     typedef typename GeMatrix<MH>::ElementType  ElementType;
599 
600     const LOGICAL    WANTT  = wantT;
601     const LOGICAL    WANTZ  = wantZ;
602     const INTEGER    N      = H.numRows();
603     const INTEGER    KTOP   = kTop;
604     const INTEGER    KBOT   = kBot;
605     const INTEGER    NW     = nw;
606     const INTEGER    LDH    = H.leadingDimension();
607     const INTEGER    ILOZ   = iLoZ;
608     const INTEGER    IHIZ   = iHiZ;
609     const INTEGER    LDZ    = Z.leadingDimension();
610     INTEGER          NS     = ns;
611     INTEGER          ND     = nd;
612     const INTEGER    LDV    = V.leadingDimension();
613     const INTEGER    NH     = T.numCols();
614     const INTEGER    LDT    = T.leadingDimension();
615     const INTEGER    NV     = WV.numRows();
616     const INTEGER    LDWV   = WV.leadingDimension();
617     const INTEGER    LWORK  = work.length();
618 
619     if (IsSame<ElementType,DOUBLE>::value) {
620         LAPACK_IMPL(dlaqr2)(&WANTT,
621                             &WANTZ,
622                             &N,
623                             &KTOP,
624                             &KBOT,
625                             &NW,
626                             H.data(),
627                             &LDH,
628                             &ILOZ,
629                             &IHIZ,
630                             Z.data(),
631                             &LDZ,
632                             &NS,
633                             &ND,
634                             sr.data(),
635                             si.data(),
636                             V.data(),
637                             &LDV,
638                             &NH,
639                             T.data(),
640                             &LDT,
641                             &NV,
642                             WV.data(),
643                             &LDWV,
644                             work.data(),
645                             &LWORK);
646     } else {
647         ASSERT(0);
648     }
649     ns = NS;
650     nd = ND;
651 }
652 
653 #endif // CHECK_CXXLAPACK
654 
655 //== public interface ==========================================================
656 template <typename IndexType, typename MT>
657 IndexType
658 laqr2_wsq(IndexType                 kTop,
659           IndexType                 kBot,
660           IndexType                 nw,
661           const GeMatrix<MT>        &T)
662 {
663     using std::max;
664 //
665 //  Test the input parameters
666 //
667 #   ifndef NDEBUG
668     ASSERT(T.firstRow()==1);
669     ASSERT(T.firstCol()==1);
670     ASSERT(T.numRows()==T.numCols());
671 #   endif
672 
673 //
674 //  Call implementation
675 //
676     IndexType info = laqr2_generic_wsq(kTop, kBot, nw, T);
677 
678 #   ifdef CHECK_CXXLAPACK
679 //
680 //  Compare results
681 //
682     IndexType _info = laqr2_native_wsq(kTop, kBot, nw, T);
683 
684     if (info!=_info) {
685         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
686         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
687         ASSERT(0);
688     }
689 #   endif
690     return info;
691 
692 }
693 
694 template <typename IndexType, typename MH, typename MZ, typename VSR,
695           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
696 void
697 laqr2(bool                      wantT,
698       bool                      wantZ,
699       IndexType                 kTop,
700       IndexType                 kBot,
701       IndexType                 nw,
702       GeMatrix<MH>              &H,
703       IndexType                 iLoZ,
704       IndexType                 iHiZ,
705       GeMatrix<MZ>              &Z,
706       IndexType                 &ns,
707       IndexType                 &nd,
708       DenseVector<VSR>          &sr,
709       DenseVector<VSI>          &si,
710       GeMatrix<MV>              &V,
711       GeMatrix<MT>              &T,
712       GeMatrix<MWV>             &WV,
713       DenseVector<VWORK>        &work)
714 {
715     LAPACK_DEBUG_OUT("laqr2");
716 
717     using std::max;
718 //
719 //  Test the input parameters
720 //
721 #   ifndef NDEBUG
722     ASSERT(H.firstRow()==1);
723     ASSERT(H.firstCol()==1);
724     ASSERT(H.numRows()==H.numCols());
725 
726     const IndexType n = H.numRows();
727 
728     if (wantZ) {
729         ASSERT(1<=iLoZ);
730         ASSERT(iLoZ<=iHiZ);
731         ASSERT(iHiZ<=n);
732 
733         ASSERT(Z.firstRow()==1);
734         ASSERT(Z.firstCol()==1);
735         ASSERT(Z.numRows()==n);
736         ASSERT(Z.numCols()==n);
737     }
738 
739     ASSERT(sr.length()==kBot);
740     ASSERT(si.length()==kBot);
741 
742     ASSERT(V.firstRow()==1);
743     ASSERT(V.firstCol()==1);
744     ASSERT(V.numRows()==nw);
745     ASSERT(V.numCols()==nw);
746 
747     const IndexType nh = T.numCols();
748     ASSERT(nh>=nw);
749 
750     ASSERT(T.firstRow()==1);
751     ASSERT(T.firstCol()==1);
752 
753     const IndexType nv = WV.numRows();
754     ASSERT(nv>=nw);
755 
756     ASSERT((work.length()==0) || (work.length()>=n));
757 #   endif
758 
759 //
760 //  Make copies of output arguments
761 //
762 #   ifdef CHECK_CXXLAPACK
763     typename GeMatrix<MH>::NoView       H_org      = H;
764     typename GeMatrix<MZ>::NoView       Z_org      = Z;
765     IndexType                           ns_org     = ns;
766     IndexType                           nd_org     = nd;
767     typename DenseVector<VSR>::NoView   sr_org     = sr;
768     typename DenseVector<VSI>::NoView   si_org     = si;
769     typename GeMatrix<MV>::NoView       V_org      = V;
770     typename GeMatrix<MT>::NoView       T_org      = T;
771     typename GeMatrix<MWV>::NoView      WV_org     = WV;
772     typename DenseVector<VWORK>::NoView work_org   = work;
773 #   endif
774 
775 //
776 //  Call implementation
777 //
778     laqr2_generic(wantT, wantZ, kTop, kBot, nw, H,
779                   iLoZ, iHiZ, Z, ns, nd, sr, si,
780                   V, T, WV, work);
781 
782 #   ifdef CHECK_CXXLAPACK
783 //
784 //  Make copies of results computed by the generic implementation
785 //
786     typename GeMatrix<MH>::NoView       H_generic      = H;
787     typename GeMatrix<MZ>::NoView       Z_generic      = Z;
788     IndexType                           ns_generic     = ns;
789     IndexType                           nd_generic     = nd;
790     typename DenseVector<VSR>::NoView   sr_generic     = sr;
791     typename DenseVector<VSI>::NoView   si_generic     = si;
792     typename GeMatrix<MV>::NoView       V_generic      = V;
793     typename GeMatrix<MT>::NoView       T_generic      = T;
794     typename GeMatrix<MWV>::NoView      WV_generic     = WV;
795     typename DenseVector<VWORK>::NoView work_generic   = work;
796 
797 //
798 //  restore output arguments
799 //
800     H      = H_org;
801     Z      = Z_org;
802     ns     = ns_org;
803     nd     = nd_org;
804     sr     = sr_org;
805     si     = si_org;
806     V      = V_org;
807     T      = T_org;
808     WV     = WV_org;
809     work   = work_org;
810 
811 //
812 //  Compare results
813 //
814     laqr2_native(wantT, wantZ, kTop, kBot, nw, H,
815                  iLoZ, iHiZ, Z, ns, nd, sr, si,
816                  V, T, WV, work);
817 
818     bool failed = false;
819     if (! isIdentical(H_generic, H, " H_generic""H")) {
820         std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
821         std::cerr << "F77LAPACK: H = " << H << std::endl;
822         std::cerr << "Original:  H_org = " << H_org << std::endl;
823         failed = true;
824     }
825 
826     if (! isIdentical(Z_generic, Z, "Z_generic""Z")) {
827         std::cerr << "CXXLAPACK: Z_generic = " << Z_generic << std::endl;
828         std::cerr << "F77LAPACK: Z = " << Z << std::endl;
829         failed = true;
830     }
831 
832     if (! isIdentical(ns_generic, ns, "ns_generic""ns")) {
833         std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
834         std::cerr << "F77LAPACK: ns = " << ns << std::endl;
835         failed = true;
836     }
837 
838     if (! isIdentical(nd_generic, nd, "nd_generic""nd")) {
839         std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
840         std::cerr << "F77LAPACK: nd = " << nd << std::endl;
841         failed = true;
842     }
843 
844     if (! isIdentical(sr_generic, sr, "sr_generic""_sr")) {
845         std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
846         std::cerr << "F77LAPACK: sr = " << sr << std::endl;
847         failed = true;
848     }
849 
850     if (! isIdentical(si_generic, si, "si_generic""si")) {
851         std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
852         std::cerr << "F77LAPACK: si = " << si << std::endl;
853         failed = true;
854     }
855 
856     if (! isIdentical(V_generic, V, "V_generic""V")) {
857         std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
858         std::cerr << "F77LAPACK: V = " << V << std::endl;
859         failed = true;
860     }
861 
862     if (! isIdentical(T_generic, T, "T_generic""T")) {
863         std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
864         std::cerr << "F77LAPACK: T = " << T << std::endl;
865         failed = true;
866     }
867 
868     if (! isIdentical(WV_generic, WV, "WV_generic""_WV")) {
869         std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
870         std::cerr << "F77LAPACK: WV = " << WV << std::endl;
871         failed = true;
872     }
873 
874     if (! isIdentical(work_generic, work, "work_generic""_work")) {
875         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
876         std::cerr << "F77LAPACK: work = " << work << std::endl;
877         failed = true;
878     }
879 
880     if (failed) {
881         std::cerr << "error in: laqr2.tcc" << std::endl;
882         ASSERT(0);
883     } else {
884 //        std::cerr << "passed: laqr2.tcc" << std::endl;
885     }
886 #   endif
887 }
888 
889 //-- forwarding ----------------------------------------------------------------
890 template <typename IndexType, typename MT>
891 IndexType
892 laqr2_wsq(IndexType                 kTop,
893           IndexType                 kBot,
894           IndexType                 nw,
895           const MT                  &&T)
896 {
897     CHECKPOINT_ENTER;
898     const IndexType info = laqr2_wsq(kTop, kBot, nw, T);
899     CHECKPOINT_LEAVE;
900 
901     return info;
902 }
903 
904 template <typename IndexType, typename MH, typename MZ, typename VSR,
905           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
906 void
907 laqr2(bool                      wantT,
908       bool                      wantZ,
909       IndexType                 kTop,
910       IndexType                 kBot,
911       IndexType                 nw,
912       MH                        &&H,
913       IndexType                 iLoZ,
914       IndexType                 iHiZ,
915       MZ                        &&Z,
916       IndexType                 &ns,
917       IndexType                 &nd,
918       VSR                       &&sr,
919       VSI                       &&si,
920       MV                        &&V,
921       MT                        &&T,
922       MWV                       &&WV,
923       VWORK                     &&work)
924 {
925     CHECKPOINT_ENTER;
926     laqr2(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
927           V, T, WV, work);
928     CHECKPOINT_LEAVE;
929 }
930 
931 } } // namespace lapack, flens
932 
933 #endif // FLENS_LAPACK_EIG_LAQR3_TCC