1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
 36      $                   IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
 37      $                   LDT, NV, WV, LDWV, WORK, LWORK )
 38  *
 39  *  -- LAPACK auxiliary routine (version 3.2.2)                        --
 40  *     Univ. of Tennessee, Univ. of California Berkeley,
 41  *     Univ. of Colorado Denver and NAG Ltd..
 42  *  -- June 2010                                                       --
 43  *
 44  */
 45 
 46 #ifndef FLENS_LAPACK_EIG_LAQR3_TCC
 47 #define FLENS_LAPACK_EIG_LAQR3_TCC 1
 48 
 49 #include <flens/blas/blas.h>
 50 #include <flens/lapack/lapack.h>
 51 
 52 namespace flens { namespace lapack {
 53 
 54 //== generic lapack implementation =============================================
 55 
 56 template <typename IndexType, typename MT>
 57 IndexType
 58 laqr3_generic_wsq(IndexType                 kTop,
 59                   IndexType                 kBot,
 60                   IndexType                 nw,
 61                   const GeMatrix<MT>        &T)
 62 {
 63     using std::max;
 64     using std::min;
 65 
 66     typedef typename GeMatrix<MT>::ElementType  ElementType;
 67 
 68     const Underscore<IndexType> _;
 69     
 70 //
 71 //  ==== Estimate optimal workspace. ====
 72 //
 73     IndexType jw = min(nw, kBot-kTop+1);
 74     auto  _T  = T(_(1,jw),_(1,jw));
 75 
 76     IndexType lWorkOpt;
 77     if (jw<=2) {
 78         lWorkOpt = 1;
 79     } else {
 80 //
 81 //      ==== Workspace query call to DGEHRD ====
 82 //
 83         IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
 84 //
 85 //      ==== Workspace query call to DORMHR ====
 86 //
 87         IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
 88 //
 89 //      ==== Workspace query call to DLAQR4 ====
 90 //
 91         IndexType lWork3 = laqr4_wsq(truetrue, IndexType(1), jw, _T);
 92 //
 93 //      ==== Optimal workspace ====
 94 //
 95         lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
 96     }
 97     return lWorkOpt;
 98 }
 99 
100 template <typename IndexType, typename MH, typename MZ, typename VSR,
101           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
102 void
103 laqr3_generic(bool                      wantT,
104               bool                      wantZ,
105               IndexType                 kTop,
106               IndexType                 kBot,
107               IndexType                 nw,
108               GeMatrix<MH>              &H,
109               IndexType                 iLoZ,
110               IndexType                 iHiZ,
111               GeMatrix<MZ>              &Z,
112               IndexType                 &ns,
113               IndexType                 &nd,
114               DenseVector<VSR>          &sr,
115               DenseVector<VSI>          &si,
116               GeMatrix<MV>              &V,
117               GeMatrix<MT>              &T,
118               GeMatrix<MWV>             &WV,
119               DenseVector<VWORK>        &work)
120 {
121     using std::abs;
122     using std::max;
123     using std::min;
124 
125     typedef typename GeMatrix<MH>::ElementType  ElementType;
126 
127     const Underscore<IndexType> _;
128 
129     const ElementType   Zero(0), One(1);
130     const IndexType     n   = H.numRows();
131     const IndexType     nh  = T.numCols();
132     const IndexType     nv  = WV.numRows();
133 //
134 //  ==== Estimate optimal workspace. ====
135 //
136     IndexType   jw      = min(nw, kBot-kTop+1);
137     auto        T_jw    = T(_(1,jw),_(1,jw));
138     auto        V_jw    = V(_(1,jw),_(1,jw));
139 
140     IndexType lWorkOpt;
141     if (jw<=2) {
142         lWorkOpt = 1;
143     } else {
144 //
145 //      ==== Workspace query call to DGEHRD ====
146 //
147         IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
148 //
149 //      ==== Workspace query call to DORMHR ====
150 //
151         IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
152 //
153 //      ==== Workspace query call to DLAQR4 ====
154 //
155         IndexType lWork3 = laqr4_wsq(truetrue, IndexType(1), jw, T_jw);
156 //
157 //      ==== Optimal workspace ====
158 //
159         lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
160     }
161 //
162 //  ==== Apply worksize query ====
163 //
164     if (work.length()==0) {
165         work.resize(lWorkOpt);
166     }
167     const IndexType lWork = work.length();
168 //
169 //  ==== Nothing to do ...
170 //  ... for an empty active block ... ====
171     ns = 0;
172     nd = 0;
173     work(1) = One;
174     if (kTop>kBot) {
175         return;
176     }
177 //   ... nor for an empty deflation window. ====
178     if (nw<1) {
179         return;
180     }
181 //
182 //  ==== Machine constants ====
183 //
184     ElementType safeMin   = lamch<ElementType>(SafeMin);
185     ElementType safeMax   = One / safeMin;
186     labad(safeMin, safeMax);
187     const ElementType ulp      = lamch<ElementType>(Precision);
188     const ElementType smallNum = safeMin*(ElementType(n)/ulp);
189 //
190 //  ==== Setup deflation window ====
191 //
192     IndexType   kwTop   = kBot - jw + 1;
193     auto        sr_kw   = sr(_(kwTop,kBot));
194     auto        si_kw   = si(_(kwTop,kBot));
195     auto        H_kw    = H(_(kwTop,kBot),_(kwTop,kBot));
196     ElementType s;
197 
198     if( kwTop==kTop) {
199         s = Zero;
200     } else {
201         s = H(kwTop, kwTop-1);
202     }
203 
204     if (kBot==kwTop) {
205 //
206 //      ==== 1-by-1 deflation window: not much to do ====
207 //
208         sr(kwTop) = H(kwTop,kwTop);
209         si(kwTop) = Zero;
210         ns = 1;
211         nd = 0;
212         if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
213             ns = 0;
214             nd = 1;
215             if (kwTop>kTop) {
216                 H(kwTop, kwTop-1) = Zero;
217             }
218         }
219         work(1) = One;
220         return;
221     }
222 //
223 //  ==== Convert to spike-triangular form.  (In case of a
224 //  .    rare QR failure, this routine continues to do
225 //  .    aggressive early deflation using that part of
226 //  .    the deflation window that converged using INFQR
227 //  .    here and there to keep track.) ====
228 //
229     T_jw.upper()  = H_kw.upper();
230     T_jw.diag(-1) = H_kw.diag(-1);
231 
232     V_jw          = Zero;
233     V_jw.diag(0)  = One;
234 
235     IndexType nMin = ilaenv<ElementType>(12"LAQR3""SV", jw, 1, jw, lWork);
236     IndexType infoQR;
237     if (jw>nMin) {
238         infoQR = laqr4(truetrue,
239                        IndexType(1), jw, T_jw,
240                        sr_kw, si_kw,
241                        IndexType(1), jw, V_jw,
242                        work);
243     } else {
244         infoQR = lahqr(truetrue,
245                        IndexType(1), jw, T_jw,
246                        sr_kw, si_kw,
247                        IndexType(1), jw, V_jw);
248     }
249 //
250 //  ==== DTREXC needs a clean margin near the diagonal ====
251 //
252     for (IndexType j=1; j<=jw-3; ++j) {
253         T(j+2, j) = Zero;
254         T(j+3, j) = Zero;
255     }
256     if (jw>2) {
257         T(jw,jw-2) = Zero;
258     }
259 //
260 //  ==== Deflation detection loop ====
261 //
262     ns = jw;
263     IndexType iFirst;
264     IndexType iLast = infoQR + 1;
265     while (iLast<=ns) {
266         bool bulge;
267         if (ns==1) {
268             bulge = false;
269         } else {
270             bulge = T(ns,ns-1)!=Zero;
271         }
272 //
273 //      ==== Small spike tip test for deflation ====
274 //
275         if (!bulge) {
276 //
277 //          ==== Real eigenvalue ====
278 //
279             ElementType foo = abs(T(ns,ns));
280             if (foo==Zero) {
281                 foo = abs(s);
282             }
283             if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
284 //
285 //               ==== Deflatable ====
286 //
287                 ns = ns - 1;
288             } else {
289 //
290 //               ==== Undeflatable.   Move it up out of the way.
291 //               .    (DTREXC can not fail in this case.) ====
292 //
293                 iFirst = ns;
294                 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
295                 ++iLast;
296             }
297         } else {
298 //
299 //          ==== Complex conjugate pair ====
300 //
301             ElementType foo = abs(T(ns,ns))
302                             + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
303             if (foo==Zero) {
304                 foo = abs(s);
305             }
306             if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
307 //
308 //              ==== Deflatable ====
309 //
310                 ns -= 2;
311             } else {
312 //
313 //              ==== Undeflatable. Move them up out of the way.
314 //              .    Fortunately, DTREXC does the right thing with
315 //              .    ILST in case of a rare exchange failure. ====
316 //
317                 iFirst = ns;
318                 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
319                 iLast += 2;
320             }
321         }
322 //
323 //      ==== End deflation detection loop ====
324 //
325     }
326 //
327 //     ==== Return to Hessenberg form ====
328 //
329     if (ns==0) {
330         s = Zero;
331     }
332 
333     if (ns<jw) {
334 //
335 //     ==== sorting diagonal blocks of T improves accuracy for
336 //     .    graded matrices.  Bubble sort deals well with
337 //     .    exchange failures. ====
338 //
339         bool sorted = false;
340         IndexType i = ns + 1;
341         while (!sorted) {
342             sorted = true;
343 
344             IndexType kEnd = i - 1;
345             i = infoQR + 1;
346 
347             IndexType k;
348             if (i==ns) {
349                 k = i + 1;
350             } else if (T(i+1,i)==Zero) {
351                 k = i + 1;
352             } else {
353                 k = i + 2;
354             }
355             while (k<=kEnd) {
356                 ElementType evi, evk;
357 
358                 if (k==i+1) {
359                     evi = abs(T(i,i));
360                 } else {
361                     evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
362                 }
363 
364                 if (k==kEnd) {
365                     evk = abs(T(k,k));
366                 } else if(T(k+1,k)==Zero) {
367                     evk = abs(T(k,k));
368                 } else {
369                     evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
370                 }
371 
372                 if (evi>=evk) {
373                     i = k;
374                 } else {
375                     sorted = false;
376                     iFirst = i;
377                     iLast = k;
378                     IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
379                                            work(_(1,jw)));
380                     if (info==0) {
381                         i = iLast;
382                     } else {
383                         i = k;
384                     }
385                 }
386                 if (i==kEnd) {
387                     k = i + 1;
388                 } else if (T(i+1,i)==Zero) {
389                     k = i + 1;
390                 } else {
391                     k = i + 2;
392                 }
393             }
394         }
395     }
396 //
397 //  ==== Restore shift/eigenvalue array from T ====
398 //
399     IndexType i = jw;
400     while (i>=infoQR+1) {
401         if (i==infoQR+1) {
402             sr(kwTop+i-1) = T(i,i);
403             si(kwTop+i-1) = Zero;
404             i = i - 1;
405         } else if (T(i,i-1)==Zero) {
406             sr(kwTop+i-1 ) = T(i,i);
407             si(kwTop+i-1 ) = Zero;
408             i = i - 1;
409         } else {
410             ElementType aa = T(i-1,i-1);
411             ElementType cc = T(i,  i-1);
412             ElementType bb = T(i-1,i  );
413             ElementType dd = T(i,  i  );
414             ElementType cs, sn;
415             lanv2(aa, bb, cc, dd,
416                   sr(kwTop+i-2), si(kwTop+i-2),
417                   sr(kwTop+i-1), si(kwTop+i-1),
418                   cs, sn);
419             i -= 2;
420         }
421     }
422 
423     if (ns<jw || s==Zero) {
424         if (ns>1 && s!=Zero) {
425 //
426 //          ==== Reflect spike back into lower triangle ====
427 //
428             work(_(1,ns)) = V(1,_(1,ns));
429             ElementType beta = work(1);
430             ElementType tau;
431 
432             larfg(ns, beta, work(_(2,ns)), tau);
433             work(1) = One;
434 
435             T(_(3,jw),_(1,jw-2)).lower() = Zero;
436 
437             const auto _v = work(_(1,ns));
438 
439             larf(Left,  _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
440             larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
441             larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
442 
443             hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
444         }
445 //
446 //      ==== Copy updated reduced window into place ====
447 //
448         if (kwTop>1) {
449             H(kwTop,kwTop-1) = s*V(1,1);
450         }
451         H_kw.upper()  = T_jw.upper();
452         H_kw.diag(-1) = T_jw.diag(-1);
453 //
454 //      ==== Accumulate orthogonal matrix in order update
455 //      .    H and Z, if requested.  ====
456 //
457         if (ns>1 && s!=Zero) {
458             ormhr(Right, NoTrans,
459                   IndexType(1), ns,
460                   T(_(1,ns),_(1,ns)),
461                   work(_(1,ns-1)),
462                   V(_(1,jw),_(1,ns)),
463                   work(_(jw+1,lWork)));
464         }
465 //
466 //      ==== Update vertical slab in H ====
467 //
468         const IndexType lTop = (wantT) ? 1 : kTop;
469 
470         for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
471             const IndexType kLn = min(nv,kwTop-kRow);
472             blas::mm(NoTrans, NoTrans, One,
473                      H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
474                      V(_(1,jw),_(1,jw)),
475                      Zero,
476                      WV(_(1,kLn),_(1,jw)));
477             H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
478         }
479 //
480 //      ==== Update horizontal slab in H ====
481 //
482         if (wantT) {
483             for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
484                 const IndexType kLn = min(nh,n-kCol+1);
485                 blas::mm(ConjTrans, NoTrans, One,
486                          V(_(1,jw),_(1,jw)),
487                          H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
488                          Zero,
489                          T(_(1,jw),_(1,kLn)));
490                 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
491             }
492         }
493 //
494 //      ==== Update vertical slab in Z ====
495 //
496         if (wantZ) {
497             for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
498                 const IndexType kLn = min(nv,iHiZ-kRow+1);
499                 blas::mm(NoTrans, NoTrans, One,
500                          Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
501                          V(_(1,jw),_(1,jw)),
502                          Zero,
503                          WV(_(1,kLn),_(1,jw)));
504                 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
505             }
506         }
507     }
508 //
509 //  ==== Return the number of deflations ... ====
510 //
511     nd = jw - ns;
512 //
513 //  ==== ... and the number of shifts. (Subtracting
514 //  .    INFQR from the spike length takes care
515 //  .    of the case of a rare QR failure while
516 //  .    calculating eigenvalues of the deflation
517 //  .    window.)  ====
518 //
519     ns = ns - infoQR;
520 //
521 //   ==== Return optimal workspace. ====
522 //
523     work(1) = lWorkOpt;
524 }
525 
526 //== interface for native lapack ===============================================
527 
528 #ifdef CHECK_CXXLAPACK
529 
530 template <typename IndexType, typename MT>
531 IndexType
532 laqr3_native_wsq(IndexType                 kTop,
533                  IndexType                 kBot,
534                  IndexType                 nw,
535                  const GeMatrix<MT>        &T)
536 {
537     typedef typename GeMatrix<MT>::ElementType  ElementType;
538 
539     const LOGICAL    WANTT  = false;
540     const LOGICAL    WANTZ  = false;
541     const INTEGER    N      = 1;
542     const INTEGER    KTOP   = kTop;
543     const INTEGER    KBOT   = kBot;
544     const INTEGER    NW     = nw;
545     const INTEGER    LDH    = 1;
546     const INTEGER    ILOZ   = 0;
547     const INTEGER    IHIZ   = 0;
548     const INTEGER    LDZ    = 1;
549     INTEGER          NS;
550     INTEGER          ND;
551     const INTEGER    LDV    = nw;
552     const INTEGER    NH     = T.numCols();
553     const INTEGER    LDT    = T.leadingDimension();
554     const INTEGER    NV     = nw;
555     const INTEGER    LDWV   = nw;
556     ElementType      WORK;
557     const INTEGER    LWORK  = -1;
558     ElementType      DUMMY;
559 
560     if (IsSame<ElementType,DOUBLE>::value) {
561         LAPACK_IMPL(dlaqr3)(&WANTT,
562                             &WANTZ,
563                             &N,
564                             &KTOP,
565                             &KBOT,
566                             &NW,
567                             &DUMMY,
568                             &LDH,
569                             &ILOZ,
570                             &IHIZ,
571                             &DUMMY,
572                             &LDZ,
573                             &NS,
574                             &ND,
575                             &DUMMY,
576                             &DUMMY,
577                             &DUMMY,
578                             &LDV,
579                             &NH,
580                             &DUMMY,
581                             &LDT,
582                             &NV,
583                             &DUMMY,
584                             &LDWV,
585                             &WORK,
586                             &LWORK);
587     } else {
588         ASSERT(0);
589     }
590     return WORK;
591 }
592 
593 template <typename IndexType, typename MH, typename MZ, typename VSR,
594           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
595 void
596 laqr3_native(bool                      wantT,
597              bool                      wantZ,
598              IndexType                 kTop,
599              IndexType                 kBot,
600              IndexType                 nw,
601              GeMatrix<MH>              &H,
602              IndexType                 iLoZ,
603              IndexType                 iHiZ,
604              GeMatrix<MZ>              &Z,
605              IndexType                 &ns,
606              IndexType                 &nd,
607              DenseVector<VSR>          &sr,
608              DenseVector<VSI>          &si,
609              GeMatrix<MV>              &V,
610              GeMatrix<MT>              &T,
611              GeMatrix<MWV>             &WV,
612              DenseVector<VWORK>        &work)
613 {
614     typedef typename GeMatrix<MH>::ElementType  ElementType;
615 
616     const LOGICAL    WANTT  = wantT;
617     const LOGICAL    WANTZ  = wantZ;
618     const INTEGER    N      = H.numRows();
619     const INTEGER    KTOP   = kTop;
620     const INTEGER    KBOT   = kBot;
621     const INTEGER    NW     = nw;
622     const INTEGER    LDH    = H.leadingDimension();
623     const INTEGER    ILOZ   = iLoZ;
624     const INTEGER    IHIZ   = iHiZ;
625     const INTEGER    LDZ    = Z.leadingDimension();
626     INTEGER          NS     = ns;
627     INTEGER          ND     = nd;
628     const INTEGER    LDV    = V.leadingDimension();
629     const INTEGER    NH     = T.numCols();
630     const INTEGER    LDT    = T.leadingDimension();
631     const INTEGER    NV     = WV.numRows();
632     const INTEGER    LDWV   = WV.leadingDimension();
633     const INTEGER    LWORK  = work.length();
634 
635     if (IsSame<ElementType,DOUBLE>::value) {
636         LAPACK_IMPL(dlaqr3)(&WANTT,
637                             &WANTZ,
638                             &N,
639                             &KTOP,
640                             &KBOT,
641                             &NW,
642                             H.data(),
643                             &LDH,
644                             &ILOZ,
645                             &IHIZ,
646                             Z.data(),
647                             &LDZ,
648                             &NS,
649                             &ND,
650                             sr.data(),
651                             si.data(),
652                             V.data(),
653                             &LDV,
654                             &NH,
655                             T.data(),
656                             &LDT,
657                             &NV,
658                             WV.data(),
659                             &LDWV,
660                             work.data(),
661                             &LWORK);
662     } else {
663         ASSERT(0);
664     }
665     ns = NS;
666     nd = ND;
667 }
668 
669 #endif // CHECK_CXXLAPACK
670 
671 //== public interface ==========================================================
672 template <typename IndexType, typename MT>
673 IndexType
674 laqr3_wsq(IndexType                 kTop,
675           IndexType                 kBot,
676           IndexType                 nw,
677           const GeMatrix<MT>        &T)
678 {
679     using std::max;
680 //
681 //  Test the input parameters
682 //
683 #   ifndef NDEBUG
684     ASSERT(T.firstRow()==1);
685     ASSERT(T.firstCol()==1);
686     ASSERT(T.numRows()==T.numCols());
687 #   endif
688 
689 //
690 //  Call implementation
691 //
692     // TODO: call generic implementation
693     IndexType info = laqr3_generic_wsq(kTop, kBot, nw, T);
694 
695 #   ifdef CHECK_CXXLAPACK
696 //
697 //  Compare results
698 //
699     IndexType _info = laqr3_native_wsq(kTop, kBot, nw, T);
700 
701     if (info!=_info) {
702         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
703         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
704         ASSERT(0);
705     }
706 #   endif
707     return info;
708 
709 }
710 
711 template <typename IndexType, typename MH, typename MZ, typename VSR,
712           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
713 void
714 laqr3(bool                      wantT,
715       bool                      wantZ,
716       IndexType                 kTop,
717       IndexType                 kBot,
718       IndexType                 nw,
719       GeMatrix<MH>              &H,
720       IndexType                 iLoZ,
721       IndexType                 iHiZ,
722       GeMatrix<MZ>              &Z,
723       IndexType                 &ns,
724       IndexType                 &nd,
725       DenseVector<VSR>          &sr,
726       DenseVector<VSI>          &si,
727       GeMatrix<MV>              &V,
728       GeMatrix<MT>              &T,
729       GeMatrix<MWV>             &WV,
730       DenseVector<VWORK>        &work)
731 {
732     LAPACK_DEBUG_OUT("laqr3");
733 
734     using std::max;
735 //
736 //  Test the input parameters
737 //
738 #   ifndef NDEBUG
739     ASSERT(H.firstRow()==1);
740     ASSERT(H.firstCol()==1);
741     ASSERT(H.numRows()==H.numCols());
742 
743     const IndexType n = H.numRows();
744 
745     if (wantZ) {
746         ASSERT(1<=iLoZ);
747         ASSERT(iLoZ<=iHiZ);
748         ASSERT(iHiZ<=n);
749 
750         ASSERT(Z.firstRow()==1);
751         ASSERT(Z.firstCol()==1);
752         ASSERT(Z.numRows()==n);
753         ASSERT(Z.numCols()==n);
754     }
755 
756     ASSERT(sr.length()==kBot);
757     ASSERT(si.length()==kBot);
758 
759     ASSERT(V.firstRow()==1);
760     ASSERT(V.firstCol()==1);
761     ASSERT(V.numRows()==nw);
762     ASSERT(V.numCols()==nw);
763 
764     const IndexType nh = T.numCols();
765     ASSERT(nh>=nw);
766 
767     ASSERT(T.firstRow()==1);
768     ASSERT(T.firstCol()==1);
769 
770     const IndexType nv = WV.numRows();
771     ASSERT(nv>=nw);
772 
773     ASSERT((work.length()==0) || (work.length()>=n));
774 #   endif
775 
776 //
777 //  Make copies of output arguments
778 //
779 #   ifdef CHECK_CXXLAPACK
780     typename GeMatrix<MH>::NoView       H_org      = H;
781     typename GeMatrix<MZ>::NoView       Z_org      = Z;
782     IndexType                           ns_org     = ns;
783     IndexType                           nd_org     = nd;
784     typename DenseVector<VSR>::NoView   sr_org     = sr;
785     typename DenseVector<VSI>::NoView   si_org     = si;
786     typename GeMatrix<MV>::NoView       V_org      = V;
787     typename GeMatrix<MT>::NoView       T_org      = T;
788     typename GeMatrix<MWV>::NoView      WV_org     = WV;
789     typename DenseVector<VWORK>::NoView work_org   = work;
790 #   endif
791 
792 //
793 //  Call implementation
794 //
795     laqr3_generic(wantT, wantZ, kTop, kBot, nw, H,
796                   iLoZ, iHiZ, Z, ns, nd, sr, si,
797                   V, T, WV, work);
798 
799 #   ifdef CHECK_CXXLAPACK
800 //
801 //  Make copies of results computed by the generic implementation
802 //
803     typename GeMatrix<MH>::NoView       H_generic      = H;
804     typename GeMatrix<MZ>::NoView       Z_generic      = Z;
805     IndexType                           ns_generic     = ns;
806     IndexType                           nd_generic     = nd;
807     typename DenseVector<VSR>::NoView   sr_generic     = sr;
808     typename DenseVector<VSI>::NoView   si_generic     = si;
809     typename GeMatrix<MV>::NoView       V_generic      = V;
810     typename GeMatrix<MT>::NoView       T_generic      = T;
811     typename GeMatrix<MWV>::NoView      WV_generic     = WV;
812     typename DenseVector<VWORK>::NoView work_generic   = work;
813 
814 //
815 //  restore output arguments
816 //
817     H       = H_org;
818     Z       = Z_org;
819     ns      = ns_org;
820     nd      = nd_org;
821     sr      = sr_org;
822     si      = si_org;
823     V       = V_org;
824     T       = T_org;
825     WV      = WV_org;
826     work    = work_org;
827 
828 //
829 //  Compare generic results with results from the native implementation
830 //
831     laqr3_native(wantT, wantZ, kTop, kBot, nw, H,
832                  iLoZ, iHiZ, Z, ns, nd, sr, si,
833                  V, T, WV, work);
834 
835     bool failed = false;
836     if (! isIdentical(H_generic, H, "H_generic""H")) {
837         std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
838         std::cerr << "F77LAPACK: H = " << H << std::endl;
839         std::cerr << "Original:  H_org = " << H_org << std::endl;
840         failed = true;
841     }
842 
843     if (! isIdentical(Z_generic, Z, "Z_generic""Z")) {
844         std::cerr << "CXXLAPACK: Z = " << Z_generic << std::endl;
845         std::cerr << "F77LAPACK: Z = " << Z << std::endl;
846         failed = true;
847     }
848 
849     if (! isIdentical(ns_generic, ns, "ns_generic""ns")) {
850         std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
851         std::cerr << "F77LAPACK: ns = " << ns << std::endl;
852         failed = true;
853     }
854 
855     if (! isIdentical(nd_generic, nd, "nd_generic""nd")) {
856         std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
857         std::cerr << "F77LAPACK: nd = " << nd << std::endl;
858         failed = true;
859     }
860 
861     if (! isIdentical(sr_generic, sr, "sr_generic""sr")) {
862         std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
863         std::cerr << "F77LAPACK: sr = " << sr << std::endl;
864         failed = true;
865     }
866 
867     if (! isIdentical(si_generic, si, "si_generic""si")) {
868         std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
869         std::cerr << "F77LAPACK: si = " << si << std::endl;
870         failed = true;
871     }
872 
873     if (! isIdentical(V_generic, V, " V_generic""V")) {
874         std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
875         std::cerr << "F77LAPACK: V = " << V << std::endl;
876         failed = true;
877     }
878 
879     if (! isIdentical(T_generic, T, "T_generic""T")) {
880         std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
881         std::cerr << "F77LAPACK: T = " << T << std::endl;
882         failed = true;
883     }
884 
885     if (! isIdentical(WV_generic, WV, "WV_generic""WV")) {
886         std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
887         std::cerr << "F77LAPACK: WV = " << WV << std::endl;
888         failed = true;
889     }
890 
891     if (! isIdentical(work_generic, work, "work_generic""work")) {
892         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
893         std::cerr << "F77LAPACK: work = " << work << std::endl;
894         failed = true;
895     }
896 
897     if (failed) {
898         std::cerr << "error in: laqr3.tcc" << std::endl;
899         ASSERT(0);
900     } else {
901 //        std::cerr << "passed: laqr3.tcc" << std::endl;
902     }
903 #   endif
904 }
905 
906 //-- forwarding ----------------------------------------------------------------
907 template <typename IndexType, typename MT>
908 IndexType
909 laqr3_wsq(IndexType                 kTop,
910           IndexType                 kBot,
911           IndexType                 nw,
912           const MT                  &&T)
913 {
914     CHECKPOINT_ENTER;
915     const IndexType info = laqr3_wsq(kTop, kBot, nw, T);
916     CHECKPOINT_LEAVE;
917 
918     return info;
919 }
920 
921 template <typename IndexType, typename MH, typename MZ, typename VSR,
922           typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
923 void
924 laqr3(bool                      wantT,
925       bool                      wantZ,
926       IndexType                 kTop,
927       IndexType                 kBot,
928       IndexType                 nw,
929       MH                        &&H,
930       IndexType                 iLoZ,
931       IndexType                 iHiZ,
932       MZ                        &&Z,
933       IndexType                 &ns,
934       IndexType                 &nd,
935       VSR                       &&sr,
936       VSI                       &&si,
937       MV                        &&V,
938       MT                        &&T,
939       MWV                       &&WV,
940       VWORK                     &&work)
941 {
942     CHECKPOINT_ENTER;
943     laqr3(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
944           V, T, WV, work);
945     CHECKPOINT_LEAVE;
946 }
947 
948 } } // namespace lapack, flens
949 
950 #endif // FLENS_LAPACK_EIG_LAQR3_TCC