1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DLAQR0( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
 36       $                   ILOZ, IHIZ, Z, LDZ, WORK, LWORK, INFO )
 37  *
 38  *  -- LAPACK auxiliary routine (version 3.2) --
 39  *     Univ. of Tennessee, Univ. of California Berkeley,
 40  *     Univ. of Colorado Denver and NAG Ltd..
 41  *     November 2006
 42  */
 43 
 44 #ifndef FLENS_LAPACK_EIG_LAQR0_TCC
 45 #define FLENS_LAPACK_EIG_LAQR0_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename IndexType, typename MH>
 55 IndexType
 56 laqr0_generic_wsq(bool                  wantT,
 57                   bool                  wantZ,
 58                   IndexType             iLo,
 59                   IndexType             iHi,
 60                   const GeMatrix<MH>    &H)
 61 {
 62     using std::max;
 63     using std::min;
 64 
 65     typedef typename GeMatrix<MH>::ElementType  T;
 66 
 67     const IndexType nTiny   = 11;
 68     const IndexType n       = H.numRows();
 69 
 70     if ((n==0) || (n<=nTiny)) {
 71         return 1;
 72     }
 73 
 74     char job[3];
 75     job[0] = (wantT) ? 'S' : 'E';
 76     job[1] = (wantZ) ? 'V' : 'N';
 77     job[2] = 0;
 78 //  
 79 //  ==== NWR = recommended deflation window size.  At this
 80 //  .    point,  N .GT. NTINY = 11, so there is enough
 81 //  .    subdiagonal workspace for NWR.GE.2 as required.
 82 //  .    (In fact, there is enough subdiagonal space for
 83 //  .    NWR.GE.3.) ====
 84 //  
 85     IndexType nwr = ilaenv<T>(13"LAQR0", job, n, iLo, iHi, -1);
 86     nwr = max(IndexType(2), nwr);
 87     nwr = min(min(IndexType(iHi-iLo+1), (n-1)/3), nwr);
 88 //
 89 //  ==== NSR = recommended number of simultaneous shifts.
 90 //  .    At this point N .GT. NTINY = 11, so there is at
 91 //  .    enough subdiagonal workspace for NSR to be even
 92 //  .    and greater than or equal to two as required. ====
 93 //
 94     IndexType nsr = ilaenv<T>(15"LAQR0", job, n, iLo, iHi, -1);
 95     nsr = min(min(nsr, (n+6)/9), IndexType(iHi-iLo));
 96     nsr = max(IndexType(2), nsr-(nsr%2));
 97 //
 98 //  ==== Estimate optimal workspace ====
 99 //
100 //  ==== Workspace query call to DLAQR3 ====
101 //
102     IndexType lWorkOpt = laqr3_wsq(IndexType(iLo), IndexType(iHi), nwr+1, H);
103 //
104 //  ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ====
105 //
106     return max(3*nsr/2, lWorkOpt);
107 }
108 
109 template <typename IndexType, typename MH, typename VWR, typename VWI,
110           typename MZ, typename VWORK>
111 IndexType
112 laqr0_generic(bool                  wantT,
113               bool                  wantZ,
114               IndexType             iLo,
115               IndexType             iHi,
116               GeMatrix<MH>          &H,
117               DenseVector<VWR>      &wr,
118               DenseVector<VWI>      &wi,
119               IndexType             iLoZ,
120               IndexType             iHiZ,
121               GeMatrix<MZ>          &Z,
122               DenseVector<VWORK>    &work)
123 {
124     using std::abs;
125     using std::max;
126     using std::min;
127     using std::swap;
128 
129     typedef typename GeMatrix<MH>::ElementType  T;
130 
131     const Underscore<IndexType>     _;
132 
133     const IndexType n       = H.numRows();
134 
135 //  ==== Matrices of order NTINY or smaller must be processed by
136 //  .    DLAHQR because of insufficient subdiagonal scratch space.
137 //  .    (This is a hard limit.) ====
138     const IndexType nTiny   = 11;
139 
140 //  ==== Exceptional deflation windows:  try to cure rare
141 //  .    slow convergence by varying the size of the
142 //  .    deflation window after KEXNW iterations. ====
143     const IndexType kexNw   = 5;
144 
145 //
146 //  ==== Exceptional shifts: try to cure rare slow convergence
147 //  .    with ad-hoc exceptional shifts every KEXSH iterations.
148 //  .    ====
149     const IndexType kexSh   = 6;
150 
151 //
152 //  ==== The constants WILK1 and WILK2 are used to form the
153 //  .    exceptional shifts. ====
154     const T         wilk1 = T(0.75),
155                     wilk2 = T(-0.4375);
156 
157     const T         Zero(0), One(1);
158 
159     IndexType info          = 0;
160     IndexType lWork, lWorkOpt;
161 
162     IndexType nDec          = -1;
163 
164 //
165 //  ==== Perform and apply a workspace query if necessary ====
166 //
167     if (work.length()==0) {
168         lWorkOpt = laqr0_generic_wsq(wantT, wantZ, iLo, iHi, H);
169         work.resize(lWorkOpt);
170     }
171 
172     lWork = work.length();
173 
174 //
175 //  ==== Quick return for N = 0: nothing to do. ====
176 //
177     if (n==0) {
178         work(1) = One;
179         return info;
180     }
181 
182     if (n<=nTiny) {
183 //
184 //      ==== Tiny matrices must use DLAHQR. ====
185 //
186         lWorkOpt = 1;
187         info = lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);
188     } else {
189 //
190 //      ==== Use small bulge multi-shift QR with aggressive early
191 //      .    deflation on larger-than-tiny matrices. ====
192 //
193 //      ==== Hope for the best. ====
194 //
195         info = 0;
196 //
197 //      ==== Set up job flags for ILAENV. ====
198 //
199         char job[3];
200         job[0] = (wantT) ? 'S' : 'E';
201         job[1] = (wantZ) ? 'V' : 'N';
202         job[2] = 0;
203 //
204 //      ==== NWR = recommended deflation window size.  At this
205 //      .    point,  N .GT. NTINY = 11, so there is enough
206 //      .    subdiagonal workspace for NWR.GE.2 as required.
207 //      .    (In fact, there is enough subdiagonal space for
208 //      .    NWR.GE.3.) ====
209 //
210         IndexType nwr = ilaenv<T>(13"LAQR0", job, n, iLo, iHi, lWork);
211         nwr = max(IndexType(2), nwr);
212         nwr = min(min(IndexType(iHi-iLo+1), (n-1)/3), nwr);
213 //
214 //      ==== NSR = recommended number of simultaneous shifts.
215 //      .    At this point N .GT. NTINY = 11, so there is at
216 //      .    enough subdiagonal workspace for NSR to be even
217 //      .    and greater than or equal to two as required. ====
218 //
219         IndexType nsr = ilaenv<T>(15"LAQR0", job, n, iLo, iHi, lWork);
220         nsr = min(min(nsr, (n+6)/9), IndexType(iHi-iLo));
221         nsr = max(IndexType(2), nsr - (nsr%2));
222 //
223 //      ==== Estimate optimal workspace ====
224 //
225 //      ==== Workspace query call to DLAQR3 ====
226 //
227         lWorkOpt = laqr3_wsq(IndexType(iLo), IndexType(iHi), nwr+1, H);
228 //
229 //      ==== Optimal workspace = MAX(DLAQR5, DLAQR3) ====
230 //
231         lWorkOpt = max(3*nsr/2, lWorkOpt);
232 //
233 //      ==== DLAHQR/DLAQR0 crossover point ====
234 //
235         IndexType nMin = ilaenv<T>(12"LAQR0", job, n, iLo, iHi, lWork);
236         nMin = max(nTiny, nMin);
237 //
238 //      ==== Nibble crossover point ====
239 //
240         IndexType nibble = ilaenv<T>(14"LAQR0", job, n, iLo, iHi, lWork);
241         nibble = max(IndexType(0), nibble);
242 //
243 //      ==== Accumulate reflections during ttswp?  Use block
244 //      .    2-by-2 structure during matrix-matrix multiply? ====
245 //
246         IndexType kacc22 = ilaenv<T>(16"LAQR0", job, n, iLo, iHi, lWork);
247         kacc22 = max(IndexType(0), kacc22);
248         kacc22 = min(IndexType(2), kacc22);
249 //
250 //      ==== NWMAX = the largest possible deflation window for
251 //      .    which there is sufficient workspace. ====
252 //
253         IndexType nwMax = min((n-1)/3, lWork/2);
254         IndexType nw = nwMax;
255 //
256 //      ==== NSMAX = the Largest number of simultaneous shifts
257 //      .    for which there is sufficient workspace. ====
258 //
259         IndexType nsMax = min((n+6 )/92*lWork/3);
260         nsMax = nsMax - (nsMax%2);
261 //
262 //      ==== NDFL: an iteration count restarted at deflation. ====
263 //
264         IndexType nDfl = 1;
265 //
266 //      ==== ITMAX = iteration limit ====
267 //
268         IndexType itMax = max(IndexType(30), 2*kexSh)
269                         * max(IndexType(10), IndexType(iHi-iLo+1));
270 //
271 //      ==== Last row and column in the active block ====
272 //
273         IndexType kBot = iHi;
274 //
275 //      ==== Main Loop ====
276 //
277         IndexType it;
278         for (it=1; it<=itMax; ++it) {
279 //
280 //          ==== Done when KBOT falls below ILO ====
281 //
282             if (kBot<iLo) {
283                 break;
284             }
285 //
286 //          ==== Locate active block ====
287 //
288             IndexType k;
289             for (k=kBot; k>=iLo+1; --k) {
290                 if (H(k,k-1)==Zero) {
291                     break;
292                 }
293             }
294             ASSERT(k==iLo || H(k,k-1)==Zero);
295             const IndexType kTop = k;
296 // 
297 //          ==== Select deflation window size:
298 //          .    Typical Case:
299 //          .      If possible and advisable, nibble the entire
300 //          .      active block.  If not, use size MIN(NWR,NWMAX)
301 //          .      or MIN(NWR+1,NWMAX) depending upon which has
302 //          .      the smaller corresponding subdiagonal entry
303 //          .      (a heuristic).
304 //          .
305 //          .    Exceptional Case:
306 //          .      If there have been no deflations in KEXNW or
307 //          .      more iterations, then vary the deflation window
308 //          .      size.   At first, because, larger windows are,
309 //          .      in general, more powerful than smaller ones,
310 //          .      rapidly increase the window to the maximum possible.
311 //          .      Then, gradually reduce the window size. ====
312 // 
313             IndexType nh = kBot - kTop + 1;
314             IndexType nwUpBd = min(nh, nwMax);
315             if (nDfl<kexNw) {
316                 nw = min(nwUpBd, nwr);
317             } else {
318                 nw = min(nwUpBd, 2*nw);
319             }
320             if (nw<nwMax) {
321                 if (nw>=nh-1) {
322                     nw = nh;
323                 } else {
324                     const IndexType kwTop = kBot - nw + 1;
325                     if (abs(H(kwTop,kwTop-1))>abs(H(kwTop-1, kwTop-2))) {
326                         ++nw;
327                     }
328                 }
329             }
330             if (nDfl<kexNw) {
331                 nDec = -1;
332             } else if (nDec>=0 || nw>=nwUpBd) {
333                 ++nDec;
334                 if (nw-nDec<2) {
335                     nDec = 0;
336                 }
337                 nw -= nDec;
338             }
339 // 
340 //          ==== Aggressive early deflation:
341 //          .    split workspace under the subdiagonal into
342 //          .      - an nw-by-nw work array V in the lower
343 //          .        left-hand-corner,
344 //          .      - an NW-by-at-least-NW-but-more-is-better
345 //          .        (NW-by-NHO) horizontal work array along
346 //          .        the bottom edge,
347 //          .      - an at-least-NW-but-more-is-better (NHV-by-NW)
348 //          .        vertical work array along the left-hand-edge.
349 //          .        ====
350 // 
351             auto _V     = H(_(n-nw+1,    n), _(   1,     nw));
352             auto _T     = H(_(n-nw+1,    n), _(nw+1, n-nw-1));
353             auto _WV    = H(_(  nw+2, n-nw), _(   1,     nw));
354 //
355 //          ==== Aggressive early deflation ====
356 //
357             IndexType   ls, ld;
358 
359             laqr3(wantT, wantZ, kTop, kBot, nw, H,
360                   IndexType(iLoZ), IndexType(iHiZ), Z,
361                   ls, ld, wr(_(1,kBot)), wi(_(1,kBot)),
362                   _V, _T, _WV, work);
363 //
364 //          ==== Adjust KBOT accounting for new deflations. ====
365 //
366             kBot -= ld;
367 //
368 //          ==== KS points to the shifts. ====
369 //
370             IndexType ks = kBot - ls + 1;
371 //
372 //          ==== Skip an expensive QR sweep if there is a (partly
373 //          .    heuristic) reason to expect that many eigenvalues
374 //          .    will deflate without it.  Here, the QR sweep is
375 //          .    skipped if many eigenvalues have just been deflated
376 //          .    or if the remaining active block is small.
377 //
378             if ((ld==0)
379              || ((100*ld<=nw*nibble) && (kBot-kTop+1>min(nMin, nwMax))))
380             {
381 //
382 //              ==== NS = nominal number of simultaneous shifts.
383 //              .    This may be lowered (slightly) if DLAQR3
384 //              .    did not provide that many shifts. ====
385 //
386                 IndexType ns = min(min(nsMax, nsr),
387                                    max(IndexType(2), kBot-kTop));
388                 ns = ns - (ns % 2);
389 //
390 //              ==== If there have been no deflations
391 //              .    in a multiple of KEXSH iterations,
392 //              .    then try exceptional shifts.
393 //              .    Otherwise use shifts provided by
394 //              .    DLAQR3 above or from the eigenvalues
395 //              .    of a trailing principal submatrix. ====
396 //
397                 if (nDfl%kexSh==0) {
398                     ks = kBot - ns + 1;
399                     for (IndexType i=kBot; i>=max(ks+1,kTop+2); i-=2) {
400                         const T ss = abs(H(i,i-1)) + abs(H(i-1,i-2));
401                         T aa = wilk1*ss + H(i,i);
402                         T bb = ss;
403                         T cc = wilk2*ss;
404                         T dd = aa;
405                         T cs, sn;
406                         lanv2(aa, bb, cc, dd,
407                               wr(i-1), wi(i-1), wr(i), wi(i),
408                               cs, sn);
409                     }
410                     if (ks==kTop) {
411                         wr(ks+1)    = H(ks+1, ks+1);
412                         wi(ks+1)    = Zero;
413                         wr(ks)      = wr(ks+1);
414                         wi(ks)      = wi(ks+1);
415                     }
416                 } else {
417 //
418 //                  ==== Got NS/2 or fewer shifts? Use DLAQR4 or
419 //                  .    DLAHQR on a trailing principal submatrix to
420 //                  .    get more. (Since NS.LE.NSMAX.LE.(N+6)/9,
421 //                  .    there is enough space below the subdiagonal
422 //                  .    to fit an NS-by-NS scratch array.) ====
423 //
424                     if (kBot-ks+1<=ns/2) {
425                         ks = kBot - ns +1;
426                         H(_(ks,kBot),_(1,ns)) = H(_(ks,kBot),_(ks,kBot));
427                         if (ns>nMin) {
428                             // TODO: avoid the need for ZDummy
429                             typename GeMatrix<MZ>::NoView ZDummy;
430                             ks += laqr4(falsefalse,
431                                         IndexType(1), ns,
432                                         H(_(ks,kBot),_(1,ns)),
433                                         wr(_(ks,kBot)), wi(_(ks,kBot)),
434                                         IndexType(1), IndexType(1),
435                                         ZDummy, work);
436                         } else {
437                             // TODO: avoid the need for ZDummy
438                             typename GeMatrix<MZ>::NoView ZDummy;
439                             ks += lahqr(falsefalse,
440                                         IndexType(1), ns,
441                                         H(_(ks,kBot),_(1,ns)),
442                                         wr(_(ks,kBot)), wi(_(ks,kBot)),
443                                         IndexType(1), IndexType(1),
444                                         ZDummy);
445                         }
446 //
447 //                      ==== In case of a rare QR failure use
448 //                      .    eigenvalues of the trailing 2-by-2
449 //                      .    principal submatrix.  ====
450 //
451                         if (ks>=kBot) {
452                             T aa = H(kBot-1,kBot-1);
453                             T cc = H(kBot,  kBot-1);
454                             T bb = H(kBot-1,kBot);
455                             T dd = H(kBot,  kBot);
456                             T cs, sn;
457                             lanv2(aa, bb, cc, dd,
458                                   wr(kBot-1), wi(kBot-1), wr(kBot), wi(kBot),
459                                   cs, sn);
460                             ks = kBot - 1;
461                         }
462                     }
463 
464                     if (kBot-ks+1>ns) {
465 //
466 //                      ==== Sort the shifts (Helps a little)
467 //                      .    Bubble sort keeps complex conjugate
468 //                      .    pairs together. ====
469 //
470                         bool sorted = false;
471                         for (IndexType k=kBot; k>=ks+1; --k) {
472                             if (sorted) {
473                                 break;
474                             }
475                             sorted = true;
476                             for (IndexType i=ks; i<=k-1; ++i) {
477                                 if (abs(wr(i))+abs(wi(i))
478                                     < abs(wr(i+1))+abs(wi(i+1)))
479                                 {
480                                     sorted = false;
481                                     swap(wr(i), wr(i+1));
482                                     swap(wi(i), wi(i+1));
483                                 }
484                             }
485                         }
486                     }
487 //
488 //                  ==== Shuffle shifts into pairs of real shifts
489 //                  .    and pairs of complex conjugate shifts
490 //                  .    assuming complex conjugate shifts are
491 //                  .    already adjacent to one another. (Yes,
492 //                  .    they are.)  ====
493 //
494                     for (IndexType i=kBot; i>=ks+2; i-=2) {
495                         if (wi(i)!=-wi(i-1)) {
496                             T tmp  = wr(i);
497                             wr(i)   = wr(i-1);
498                             wr(i-1) = wr(i-2);
499                             wr(i-2) = tmp;
500 
501                             tmp     = wi(i);
502                             wi(i)   = wi(i-1);
503                             wi(i-1) = wi(i-2);
504                             wi(i-2) = tmp;
505                         }
506                     }
507                 }
508 //
509 //              ==== If there are only two shifts and both are
510 //              .    real, then use only one.  ====
511 //
512                 if (kBot-ks+1==2) {
513                     if (wi(kBot)==0) {
514                         const T _H = H(kBot,kBot);
515                         if (abs(wr(kBot)-_H) < abs(wr(kBot-1)-_H)) {
516                             wr(kBot-1) = wr(kBot);
517                         } else {
518                             wr(kBot) = wr(kBot-1);
519                         }
520                     }
521                 }
522 //
523 //              ==== Use up to NS of the the smallest magnatiude
524 //              .    shifts.  If there aren't NS shifts available,
525 //              .    then use them all, possibly dropping one to
526 //              .    make the number of shifts even. ====
527 //
528                 ns = min(ns, kBot-ks+1);
529                 ns = ns - (ns%2);
530                 ks = kBot - ns + 1;
531 //
532 //              ==== Small-bulge multi-shift QR sweep:
533 //              .    split workspace under the subdiagonal into
534 //              .    - a KDU-by-KDU work array U in the lower
535 //              .      left-hand-corner,
536 //              .    - a KDU-by-at-least-KDU-but-more-is-better
537 //              .      (KDU-by-NHo) horizontal work array WH along
538 //              .      the bottom edge,
539 //              .    - and an at-least-KDU-but-more-is-better-by-KDU
540 //              .      (NVE-by-KDU) vertical work WV arrow along
541 //              .      the left-hand-edge. ====
542 //
543                 IndexType kdu   = 3*ns - 3;
544                 IndexType ku    = n - kdu + 1;
545                 IndexType kmv   = kdu + 4;
546                 IndexType nho   = (n-kdu+1-4) - (kdu+1) + 1;
547 
548                 typedef typename GeMatrix<MH>::View GeMatrixView;
549                 GeMatrixView _V(IndexType(3), ns/2, work(_(1,3*ns/2)));
550                 auto _U     = H(_( ku,    n), _(    1,    kdu));
551                 auto _WV    = H(_(kmv,n-kdu), _(    1,    kdu));
552                 auto _WH    = H(_( ku,    n), _(kdu+1,kdu+nho));
553 //
554 //              ==== Small-bulge multi-shift QR sweep ====
555 //
556                 laqr5(wantT, wantZ, kacc22, kTop, kBot, ns,
557                       wr(_(ks,kBot)), wi(_(ks,kBot)), H,
558                       IndexType(iLoZ), IndexType(iHiZ), Z,
559                       _V, _U, _WV, _WH);
560             }
561 //
562 //          ==== Note progress (or the lack of it). ====
563 //
564             if (ld>0) {
565                 nDfl = 1;
566             } else {
567                 ++nDfl;
568             }
569 //
570 //          ==== End of main loop ====
571 //
572         }
573 //
574 //      ==== Iteration limit exceeded.  Set INFO to show where
575 //      .    the problem occurred and exit. ====
576 //
577         if (it>itMax) {
578             info = kBot;
579         }
580     }
581 
582     work(1) = lWorkOpt;
583     return info;
584 }
585 
586 //== interface for native lapack ===============================================
587 
588 #ifdef CHECK_CXXLAPACK
589 
590 template <typename IndexType, typename MH>
591 IndexType
592 laqr0_native_wsq(bool                  wantT,
593                  bool                  wantZ,
594                  IndexType             iLo,
595                  IndexType             iHi,
596                  const GeMatrix<MH>    &H)
597 {
598     typedef typename GeMatrix<MH>::ElementType  T;
599 
600     const LOGICAL   WANTT   = wantT;
601     const LOGICAL   WANTZ   = wantZ;
602     const INTEGER   N       = H.numRows();
603     const INTEGER   ILO     = iLo;
604     const INTEGER   IHI     = iHi;
605     const INTEGER   LDH     = H.leadingDimension();
606     const INTEGER   ILOZ    = 0;
607     const INTEGER   IHIZ    = 0;
608     const INTEGER   LDZ     = 0;
609     T               WORK;
610     T               DUMMY;
611     const INTEGER   LWORK   = -1;
612     INTEGER         INFO;
613 
614     if (IsSame<T,DOUBLE>::value) {
615         LAPACK_IMPL(dlaqr0)(&WANTT,
616                             &WANTZ,
617                             &N,
618                             &ILO,
619                             &IHI,
620                             &DUMMY,
621                             &LDH,
622                             &DUMMY,
623                             &DUMMY,
624                             &ILOZ,
625                             &IHIZ,
626                             &DUMMY,
627                             &LDZ,
628                             &WORK,
629                             &LWORK,
630                             &INFO);
631     } else {
632         ASSERT(0);
633     }
634     return WORK;
635 }
636 
637 template <typename IndexType, typename MH, typename VWR, typename VWI,
638           typename MZ, typename VWORK>
639 IndexType
640 laqr0_native(bool                  wantT,
641              bool                  wantZ,
642              IndexType             iLo,
643              IndexType             iHi,
644              GeMatrix<MH>          &H,
645              DenseVector<VWR>      &wr,
646              DenseVector<VWI>      &wi,
647              IndexType             iLoZ,
648              IndexType             iHiZ,
649              GeMatrix<MZ>          &Z,
650              DenseVector<VWORK>    &work)
651 {
652     typedef typename GeMatrix<MH>::ElementType  T;
653 
654     const LOGICAL   WANTT   = wantT;
655     const LOGICAL   WANTZ   = wantZ;
656     const INTEGER   N       = H.numRows();
657     const INTEGER   ILO     = iLo;
658     const INTEGER   IHI     = iHi;
659     const INTEGER   LDH     = H.leadingDimension();
660     const INTEGER   ILOZ    = iLoZ;
661     const INTEGER   IHIZ    = iHiZ;
662     const INTEGER   LDZ     = Z.leadingDimension();
663     const INTEGER   LWORK   = work.length();
664     INTEGER         INFO;
665 
666     if (IsSame<T,DOUBLE>::value) {
667         LAPACK_IMPL(dlaqr0)(&WANTT,
668                             &WANTZ,
669                             &N,
670                             &ILO,
671                             &IHI,
672                             H.data(),
673                             &LDH,
674                             wr.data(),
675                             wi.data(),
676                             &ILOZ,
677                             &IHIZ,
678                             Z.data(),
679                             &LDZ,
680                             work.data(),
681                             &LWORK,
682                             &INFO);
683     } else {
684         ASSERT(0);
685     }
686     ASSERT(INFO>=0);
687     return INFO;
688 }
689 
690 #endif // CHECK_CXXLAPACK
691 
692 //== public interface ==========================================================
693 template <typename IndexType, typename MH>
694 IndexType
695 laqr0_wsq(bool                  wantT,
696           bool                  wantZ,
697           IndexType             iLo,
698           IndexType             iHi,
699           const GeMatrix<MH>    &H)
700 {
701     using std::max;
702 //
703 //  Test the input parameters
704 //
705 #   ifndef NDEBUG
706     ASSERT(H.firstRow()==1);
707     ASSERT(H.firstCol()==1);
708     ASSERT(H.numRows()==H.numCols());
709 
710     const IndexType n = H.numRows();
711 
712     if (n>0) {
713         ASSERT(1<=iLo);
714         ASSERT(iLo<=iHi);
715         ASSERT(iHi<=n);
716     } else {
717         ASSERT(iLo==1);
718         ASSERT(iHi==0);
719     }
720 #   endif
721 
722 //
723 //  Call implementation
724 //
725     IndexType info = laqr0_generic_wsq(wantT, wantZ, iLo, iHi, H);
726 
727 #   ifdef CHECK_CXXLAPACK
728 //
729 //  Compare results
730 //
731     IndexType _info = laqr0_native_wsq(wantT, wantZ, iLo, iHi, H);
732 
733     if (info!=_info) {
734         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
735         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
736         ASSERT(0);
737     }
738 #   endif
739     return info;
740 }
741 
742 template <typename IndexType, typename MH, typename VWR, typename VWI,
743           typename MZ, typename VWORK>
744 IndexType
745 laqr0(bool                  wantT,
746       bool                  wantZ,
747       IndexType             iLo,
748       IndexType             iHi,
749       GeMatrix<MH>          &H,
750       DenseVector<VWR>      &wr,
751       DenseVector<VWI>      &wi,
752       IndexType             iLoZ,
753       IndexType             iHiZ,
754       GeMatrix<MZ>          &Z,
755       DenseVector<VWORK>    &work)
756 {
757     LAPACK_DEBUG_OUT("laqr0");
758 
759     using std::max;
760 //
761 //  Test the input parameters
762 //
763 #   ifndef NDEBUG
764     ASSERT(H.firstRow()==1);
765     ASSERT(H.firstCol()==1);
766     ASSERT(H.numRows()==H.numCols());
767 
768     const IndexType n = H.numRows();
769 
770     if (n>0) {
771         ASSERT(1<=iLo);
772         ASSERT(iLo<=iHi);
773         ASSERT(iHi<=n);
774     } else {
775         ASSERT(iLo==1);
776         ASSERT(iHi==0);
777     }
778 
779     ASSERT(wr.firstIndex()==1);
780     ASSERT(wr.length()>=iHi);
781 
782     ASSERT(wi.firstIndex()==1);
783     ASSERT(wi.length()>=iHi);
784 
785     ASSERT(1<=iLoZ);
786     ASSERT(iLoZ<=iLo);
787     ASSERT(iHi<=iHiZ);
788     ASSERT(iHiZ<=n);
789 
790     ASSERT(Z.firstRow()==1);
791     ASSERT(Z.firstCol()==1);
792     ASSERT(Z.numRows()>=iHi);
793     ASSERT(Z.numCols()>=iHi);
794 
795     ASSERT((work.length()==0) || (work.length()>=n));
796 #   endif
797 
798 //
799 //  Make copies of output arguments
800 //
801 #   ifdef CHECK_CXXLAPACK
802     typename GeMatrix<MH>::NoView          H_org       = H;
803     typename DenseVector<VWR>::NoView      wr_org      = wr;
804     typename DenseVector<VWI>::NoView      wi_org      = wi;
805     typename GeMatrix<MZ>::NoView          Z_org       = Z;
806     typename DenseVector<VWORK>::NoView    work_org    = work;
807 #   endif
808 
809 //
810 //  Call implementation
811 //
812     IndexType info = laqr0_generic(wantT, wantZ, iLo, iHi, H, wr, wi,
813                                    iLoZ, iHiZ, Z, work);
814 
815 #   ifdef CHECK_CXXLAPACK
816 //
817 //  Make copies of results computed by the generic implementation
818 //
819     typename GeMatrix<MH>::NoView          H_generic       = H;
820     typename DenseVector<VWR>::NoView      wr_generic      = wr;
821     typename DenseVector<VWI>::NoView      wi_generic      = wi;
822     typename GeMatrix<MZ>::NoView          Z_generic       = Z;
823     typename DenseVector<VWORK>::NoView    work_generic    = work;
824 //
825 //  restore output arguments
826 //
827     H = H_org;
828     wr = wr_org;
829     wi = wi_org;
830     Z = Z_org;
831     work = work_org;
832 //
833 //  Compare generic results with results from the native implementation
834 //
835     IndexType _info = laqr0_native(wantT, wantZ, iLo, iHi, H, wr, wi,
836                                    iLoZ, iHiZ, Z, work);
837 
838     bool failed = false;
839     if (! isIdentical(H_generic, H, "H_generic""H")) {
840         std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
841         std::cerr << "F77LAPACK: H = " << H << std::endl;
842         failed = true;
843     }
844 
845     if (! isIdentical(wr_generic, wr, "wr_generic""wr")) {
846         std::cerr << "CXXLAPACK: wr_generic = " << wr_generic << std::endl;
847         std::cerr << "F77LAPACK: wr = " << wr << std::endl;
848         failed = true;
849     }
850 
851     if (! isIdentical(wi_generic, wi, "wi_generic""wi")) {
852         std::cerr << "CXXLAPACK: wi_generic = " << wi_generic << std::endl;
853         std::cerr << "F77LAPACK: wi = " << wi << std::endl;
854         failed = true;
855     }
856 
857     if (! isIdentical(Z_generic, Z, "Z_generic""Z")) {
858         std::cerr << "CXXLAPACK: Z_generic = " << Z_generic << std::endl;
859         std::cerr << "F77LAPACK: Z = " << Z << std::endl;
860         failed = true;
861     }
862 
863     if (! isIdentical(info, _info, " info""_info")) {
864         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
865         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
866         failed = true;
867     }
868 
869     if (! isIdentical(work_generic, work, "work_generic""work")) {
870         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
871         std::cerr << "F77LAPACK: work = " << work << std::endl;
872         failed = true;
873     }
874 
875     if (failed) {
876         std::cerr << "error in: laqr0.tcc" << std::endl;
877         ASSERT(0);
878     } else {
879         // std::cerr << "passed: laqr0.tcc" << std::endl;
880     }
881 #   endif
882 
883     return info;
884 }
885 
886 //-- forwarding ----------------------------------------------------------------
887 template <typename IndexType, typename MH>
888 IndexType
889 laqr0_wsq(bool              wantT,
890           bool              wantZ,
891           IndexType         iLo,
892           IndexType         iHi,
893           const MH          &&H)
894 {
895     CHECKPOINT_ENTER;
896     const IndexType info = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
897     CHECKPOINT_LEAVE;
898 
899     return info;
900 }
901 
902 template <typename IndexType, typename MH, typename VWR, typename VWI,
903           typename MZ, typename VWORK>
904 IndexType
905 laqr0(bool                  wantT,
906       bool                  wantZ,
907       IndexType             iLo,
908       IndexType             iHi,
909       MH                    &&H,
910       VWR                   &&wr,
911       VWI                   &&wi,
912       IndexType             iLoZ,
913       IndexType             iHiZ,
914       MZ                    &&Z,
915       VWORK                 &&work)
916 {
917     CHECKPOINT_ENTER;
918     const IndexType info = laqr0(wantT, wantZ, iLo, iHi, H, wr, wi,
919                                  iLoZ, iHiZ, Z, work);
920     CHECKPOINT_LEAVE;
921 
922     return info;
923 }
924 
925 } } // namespace lapack, flens
926 
927 #endif // FLENS_LAPACK_EIG_LAQR0_TCC