1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
36 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
37 $ LDT, NV, WV, LDWV, WORK, LWORK )
38 *
39 * -- LAPACK auxiliary routine (version 3.2.2) --
40 * Univ. of Tennessee, Univ. of California Berkeley,
41 * Univ. of Colorado Denver and NAG Ltd..
42 * -- June 2010 --
43 *
44 */
45
46 #ifndef FLENS_LAPACK_EIG_LAQR2_TCC
47 #define FLENS_LAPACK_EIG_LAQR2_TCC 1
48
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename IndexType, typename MT>
57 IndexType
58 laqr2_generic_wsq(IndexType kTop,
59 IndexType kBot,
60 IndexType nw,
61 const GeMatrix<MT> &T)
62 {
63 using std::max;
64 using std::min;
65
66 typedef typename GeMatrix<MT>::ElementType ElementType;
67
68 const Underscore<IndexType> _;
69
70 //
71 // ==== Estimate optimal workspace. ====
72 //
73 IndexType jw = min(nw, kBot-kTop+1);
74 auto _T = T(_(1,jw),_(1,jw));
75
76 IndexType lWorkOpt;
77 if (jw<=2) {
78 lWorkOpt = 1;
79 } else {
80 //
81 // ==== Workspace query call to DGEHRD ====
82 //
83 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
84 //
85 // ==== Workspace query call to DORMHR ====
86 //
87 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
88 //
89 // ==== Optimal workspace ====
90 //
91 lWorkOpt = jw+max(lWork1,lWork2);
92 }
93 return lWorkOpt;
94 }
95
96 template <typename IndexType, typename MH, typename MZ, typename VSR,
97 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
98 void
99 laqr2_generic(bool wantT,
100 bool wantZ,
101 IndexType kTop,
102 IndexType kBot,
103 IndexType nw,
104 GeMatrix<MH> &H,
105 IndexType iLoZ,
106 IndexType iHiZ,
107 GeMatrix<MZ> &Z,
108 IndexType &ns,
109 IndexType &nd,
110 DenseVector<VSR> &sr,
111 DenseVector<VSI> &si,
112 GeMatrix<MV> &V,
113 GeMatrix<MT> &T,
114 GeMatrix<MWV> &WV,
115 DenseVector<VWORK> &work)
116 {
117 using std::abs;
118 using std::max;
119 using std::min;
120
121 typedef typename GeMatrix<MH>::ElementType ElementType;
122
123 const Underscore<IndexType> _;
124
125 const ElementType Zero(0), One(1);
126 const IndexType n = H.numRows();
127 const IndexType nh = T.numCols();
128 const IndexType nv = WV.numRows();
129 //
130 // ==== Estimate optimal workspace. ====
131 //
132 IndexType jw = min(nw, kBot-kTop+1);
133 auto T_jw = T(_(1,jw),_(1,jw));
134 auto V_jw = V(_(1,jw),_(1,jw));
135
136 IndexType lWorkOpt;
137 if (jw<=2) {
138 lWorkOpt = 1;
139 } else {
140 //
141 // ==== Workspace query call to DGEHRD ====
142 //
143 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
144 //
145 // ==== Workspace query call to DORMHR ====
146 //
147 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
148 //
149 // ==== Optimal workspace ====
150 //
151 lWorkOpt = jw+max(lWork1,lWork2);
152 }
153 //
154 // ==== Apply worksize query ====
155 //
156 if (work.length()==0) {
157 work.resize(lWorkOpt);
158 }
159 const IndexType lWork = work.length();
160 //
161 // ==== Nothing to do ...
162 // ... for an empty active block ... ====
163 ns = 0;
164 nd = 0;
165 work(1) = One;
166 if (kTop>kBot) {
167 return;
168 }
169 // ... nor for an empty deflation window. ====
170 if (nw<1) {
171 return;
172 }
173 //
174 // ==== Machine constants ====
175 //
176 ElementType safeMin = lamch<ElementType>(SafeMin);
177 ElementType safeMax = One / safeMin;
178 labad(safeMin, safeMax);
179 const ElementType ulp = lamch<ElementType>(Precision);
180 const ElementType smallNum = safeMin*(ElementType(n)/ulp);
181 //
182 // ==== Setup deflation window ====
183 //
184 IndexType kwTop = kBot - jw + 1;
185 auto sr_kw = sr(_(kwTop,kBot));
186 auto si_kw = si(_(kwTop,kBot));
187 auto H_kw = H(_(kwTop,kBot),_(kwTop,kBot));
188 ElementType s;
189
190 if( kwTop==kTop) {
191 s = Zero;
192 } else {
193 s = H(kwTop, kwTop-1);
194 }
195
196 if (kBot==kwTop) {
197 //
198 // ==== 1-by-1 deflation window: not much to do ====
199 //
200 sr(kwTop) = H(kwTop,kwTop);
201 si(kwTop) = Zero;
202 ns = 1;
203 nd = 0;
204 if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
205 ns = 0;
206 nd = 1;
207 if (kwTop>kTop) {
208 H(kwTop, kwTop-1) = Zero;
209 }
210 }
211 work(1) = One;
212 return;
213 }
214 //
215 // ==== Convert to spike-triangular form. (In case of a
216 // . rare QR failure, this routine continues to do
217 // . aggressive early deflation using that part of
218 // . the deflation window that converged using INFQR
219 // . here and there to keep track.) ====
220 //
221 T_jw.upper() = H_kw.upper();
222 T_jw.diag(-1) = H_kw.diag(-1);
223
224 V_jw = Zero;
225 V_jw.diag(0) = One;
226
227 IndexType infoQR;
228 infoQR = lahqr(true, true,
229 IndexType(1), jw, T_jw,
230 sr_kw, si_kw,
231 IndexType(1), jw, V_jw);
232 //
233 // ==== DTREXC needs a clean margin near the diagonal ====
234 //
235 for (IndexType j=1; j<=jw-3; ++j) {
236 T(j+2, j) = Zero;
237 T(j+3, j) = Zero;
238 }
239 if (jw>2) {
240 T(jw,jw-2) = Zero;
241 }
242 //
243 // ==== Deflation detection loop ====
244 //
245 ns = jw;
246 IndexType iFirst;
247 IndexType iLast = infoQR + 1;
248 while (iLast<=ns) {
249 bool bulge;
250 if (ns==1) {
251 bulge = false;
252 } else {
253 bulge = T(ns,ns-1)!=Zero;
254 }
255 //
256 // ==== Small spike tip test for deflation ====
257 //
258 if (!bulge) {
259 //
260 // ==== Real eigenvalue ====
261 //
262 ElementType foo = abs(T(ns,ns));
263 if (foo==Zero) {
264 foo = abs(s);
265 }
266 if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
267 //
268 // ==== Deflatable ====
269 //
270 ns = ns - 1;
271 } else {
272 //
273 // ==== Undeflatable. Move it up out of the way.
274 // . (DTREXC can not fail in this case.) ====
275 //
276 iFirst = ns;
277 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
278 ++iLast;
279 }
280 } else {
281 //
282 // ==== Complex conjugate pair ====
283 //
284 ElementType foo = abs(T(ns,ns))
285 + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
286 if (foo==Zero) {
287 foo = abs(s);
288 }
289 if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
290 //
291 // ==== Deflatable ====
292 //
293 ns -= 2;
294 } else {
295 //
296 // ==== Undeflatable. Move them up out of the way.
297 // . Fortunately, DTREXC does the right thing with
298 // . ILST in case of a rare exchange failure. ====
299 //
300 iFirst = ns;
301 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
302 iLast += 2;
303 }
304 }
305 //
306 // ==== End deflation detection loop ====
307 //
308 }
309 //
310 // ==== Return to Hessenberg form ====
311 //
312 if (ns==0) {
313 s = Zero;
314 }
315
316 if (ns<jw) {
317 //
318 // ==== sorting diagonal blocks of T improves accuracy for
319 // . graded matrices. Bubble sort deals well with
320 // . exchange failures. ====
321 //
322 bool sorted = false;
323 IndexType i = ns + 1;
324 while (!sorted) {
325 sorted = true;
326
327 IndexType kEnd = i - 1;
328 i = infoQR + 1;
329
330 IndexType k;
331 if (i==ns) {
332 k = i + 1;
333 } else if (T(i+1,i)==Zero) {
334 k = i + 1;
335 } else {
336 k = i + 2;
337 }
338 while (k<=kEnd) {
339 ElementType evi, evk;
340
341 if (k==i+1) {
342 evi = abs(T(i,i));
343 } else {
344 evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
345 }
346
347 if (k==kEnd) {
348 evk = abs(T(k,k));
349 } else if(T(k+1,k)==Zero) {
350 evk = abs(T(k,k));
351 } else {
352 evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
353 }
354
355 if (evi>=evk) {
356 i = k;
357 } else {
358 sorted = false;
359 iFirst = i;
360 iLast = k;
361 IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
362 work(_(1,jw)));
363 if (info==0) {
364 i = iLast;
365 } else {
366 i = k;
367 }
368 }
369 if (i==kEnd) {
370 k = i + 1;
371 } else if (T(i+1,i)==Zero) {
372 k = i + 1;
373 } else {
374 k = i + 2;
375 }
376 }
377 }
378 }
379 //
380 // ==== Restore shift/eigenvalue array from T ====
381 //
382 IndexType i = jw;
383 while (i>=infoQR+1) {
384 if (i==infoQR+1) {
385 sr(kwTop+i-1) = T(i,i);
386 si(kwTop+i-1) = Zero;
387 i = i - 1;
388 } else if (T(i,i-1)==Zero) {
389 sr(kwTop+i-1 ) = T(i,i);
390 si(kwTop+i-1 ) = Zero;
391 i = i - 1;
392 } else {
393 ElementType aa = T(i-1,i-1);
394 ElementType cc = T(i, i-1);
395 ElementType bb = T(i-1,i );
396 ElementType dd = T(i, i );
397 ElementType cs, sn;
398 lanv2(aa, bb, cc, dd,
399 sr(kwTop+i-2), si(kwTop+i-2),
400 sr(kwTop+i-1), si(kwTop+i-1),
401 cs, sn);
402 i -= 2;
403 }
404 }
405
406 if (ns<jw || s==Zero) {
407
408 if (ns>1 && s!=Zero) {
409 //
410 // ==== Reflect spike back into lower triangle ====
411 //
412 work(_(1,ns)) = V(1,_(1,ns));
413 ElementType beta = work(1);
414 ElementType tau;
415
416 larfg(ns, beta, work(_(2,ns)), tau);
417 work(1) = One;
418
419 T(_(3,jw),_(1,jw-2)).lower() = Zero;
420
421 const auto _v = work(_(1,ns));
422
423 larf(Left, _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
424 larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
425 larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
426
427 hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
428 }
429 //
430 // ==== Copy updated reduced window into place ====
431 //
432 if (kwTop>1) {
433 H(kwTop,kwTop-1) = s*V(1,1);
434 }
435 H_kw.upper() = T_jw.upper();
436 H_kw.diag(-1) = T_jw.diag(-1);
437 //
438 // ==== Accumulate orthogonal matrix in order update
439 // . H and Z, if requested. ====
440 //
441 if (ns>1 && s!=Zero) {
442 ormhr(Right, NoTrans,
443 IndexType(1), ns,
444 T(_(1,ns),_(1,ns)),
445 work(_(1,ns-1)),
446 V(_(1,jw),_(1,ns)),
447 work(_(jw+1,lWork)));
448 }
449 //
450 // ==== Update vertical slab in H ====
451 //
452 const IndexType lTop = (wantT) ? 1 : kTop;
453
454 for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
455 const IndexType kLn = min(nv,kwTop-kRow);
456 blas::mm(NoTrans, NoTrans, One,
457 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
458 V(_(1,jw),_(1,jw)),
459 Zero,
460 WV(_(1,kLn),_(1,jw)));
461 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
462 }
463 //
464 // ==== Update horizontal slab in H ====
465 //
466 if (wantT) {
467 for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
468 const IndexType kLn = min(nh,n-kCol+1);
469 blas::mm(ConjTrans, NoTrans, One,
470 V(_(1,jw),_(1,jw)),
471 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
472 Zero,
473 T(_(1,jw),_(1,kLn)));
474 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
475 }
476 }
477 //
478 // ==== Update vertical slab in Z ====
479 //
480 if (wantZ) {
481 for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
482 const IndexType kLn = min(nv,iHiZ-kRow+1);
483 blas::mm(NoTrans, NoTrans, One,
484 Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
485 V(_(1,jw),_(1,jw)),
486 Zero,
487 WV(_(1,kLn),_(1,jw)));
488 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
489 }
490 }
491 }
492 //
493 // ==== Return the number of deflations ... ====
494 //
495 nd = jw - ns;
496 //
497 // ==== ... and the number of shifts. (Subtracting
498 // . INFQR from the spike length takes care
499 // . of the case of a rare QR failure while
500 // . calculating eigenvalues of the deflation
501 // . window.) ====
502 //
503 ns = ns - infoQR;
504 //
505 // ==== Return optimal workspace. ====
506 //
507 work(1) = lWorkOpt;
508 }
509
510 //== interface for native lapack ===============================================
511
512 #ifdef CHECK_CXXLAPACK
513
514 template <typename IndexType, typename MT>
515 IndexType
516 laqr2_native_wsq(IndexType kTop,
517 IndexType kBot,
518 IndexType nw,
519 const GeMatrix<MT> &T)
520 {
521 typedef typename GeMatrix<MT>::ElementType ElementType;
522
523 const LOGICAL WANTT = false;
524 const LOGICAL WANTZ = false;
525 const INTEGER N = 1;
526 const INTEGER KTOP = kTop;
527 const INTEGER KBOT = kBot;
528 const INTEGER NW = nw;
529 const INTEGER LDH = 1;
530 const INTEGER ILOZ = 0;
531 const INTEGER IHIZ = 0;
532 const INTEGER LDZ = 1;
533 INTEGER NS;
534 INTEGER ND;
535 const INTEGER LDV = nw;
536 const INTEGER NH = T.numCols();
537 const INTEGER LDT = T.leadingDimension();
538 const INTEGER NV = nw;
539 const INTEGER LDWV = nw;
540 ElementType WORK;
541 const INTEGER LWORK = -1;
542 ElementType DUMMY;
543
544 if (IsSame<ElementType,DOUBLE>::value) {
545 LAPACK_IMPL(dlaqr2)(&WANTT,
546 &WANTZ,
547 &N,
548 &KTOP,
549 &KBOT,
550 &NW,
551 &DUMMY,
552 &LDH,
553 &ILOZ,
554 &IHIZ,
555 &DUMMY,
556 &LDZ,
557 &NS,
558 &ND,
559 &DUMMY,
560 &DUMMY,
561 &DUMMY,
562 &LDV,
563 &NH,
564 &DUMMY,
565 &LDT,
566 &NV,
567 &DUMMY,
568 &LDWV,
569 &WORK,
570 &LWORK);
571 } else {
572 ASSERT(0);
573 }
574 return WORK;
575 }
576
577 template <typename IndexType, typename MH, typename MZ, typename VSR,
578 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
579 void
580 laqr2_native(bool wantT,
581 bool wantZ,
582 IndexType kTop,
583 IndexType kBot,
584 IndexType nw,
585 GeMatrix<MH> &H,
586 IndexType iLoZ,
587 IndexType iHiZ,
588 GeMatrix<MZ> &Z,
589 IndexType &ns,
590 IndexType &nd,
591 DenseVector<VSR> &sr,
592 DenseVector<VSI> &si,
593 GeMatrix<MV> &V,
594 GeMatrix<MT> &T,
595 GeMatrix<MWV> &WV,
596 DenseVector<VWORK> &work)
597 {
598 typedef typename GeMatrix<MH>::ElementType ElementType;
599
600 const LOGICAL WANTT = wantT;
601 const LOGICAL WANTZ = wantZ;
602 const INTEGER N = H.numRows();
603 const INTEGER KTOP = kTop;
604 const INTEGER KBOT = kBot;
605 const INTEGER NW = nw;
606 const INTEGER LDH = H.leadingDimension();
607 const INTEGER ILOZ = iLoZ;
608 const INTEGER IHIZ = iHiZ;
609 const INTEGER LDZ = Z.leadingDimension();
610 INTEGER NS = ns;
611 INTEGER ND = nd;
612 const INTEGER LDV = V.leadingDimension();
613 const INTEGER NH = T.numCols();
614 const INTEGER LDT = T.leadingDimension();
615 const INTEGER NV = WV.numRows();
616 const INTEGER LDWV = WV.leadingDimension();
617 const INTEGER LWORK = work.length();
618
619 if (IsSame<ElementType,DOUBLE>::value) {
620 LAPACK_IMPL(dlaqr2)(&WANTT,
621 &WANTZ,
622 &N,
623 &KTOP,
624 &KBOT,
625 &NW,
626 H.data(),
627 &LDH,
628 &ILOZ,
629 &IHIZ,
630 Z.data(),
631 &LDZ,
632 &NS,
633 &ND,
634 sr.data(),
635 si.data(),
636 V.data(),
637 &LDV,
638 &NH,
639 T.data(),
640 &LDT,
641 &NV,
642 WV.data(),
643 &LDWV,
644 work.data(),
645 &LWORK);
646 } else {
647 ASSERT(0);
648 }
649 ns = NS;
650 nd = ND;
651 }
652
653 #endif // CHECK_CXXLAPACK
654
655 //== public interface ==========================================================
656 template <typename IndexType, typename MT>
657 IndexType
658 laqr2_wsq(IndexType kTop,
659 IndexType kBot,
660 IndexType nw,
661 const GeMatrix<MT> &T)
662 {
663 using std::max;
664 //
665 // Test the input parameters
666 //
667 # ifndef NDEBUG
668 ASSERT(T.firstRow()==1);
669 ASSERT(T.firstCol()==1);
670 ASSERT(T.numRows()==T.numCols());
671 # endif
672
673 //
674 // Call implementation
675 //
676 IndexType info = laqr2_generic_wsq(kTop, kBot, nw, T);
677
678 # ifdef CHECK_CXXLAPACK
679 //
680 // Compare results
681 //
682 IndexType _info = laqr2_native_wsq(kTop, kBot, nw, T);
683
684 if (info!=_info) {
685 std::cerr << "CXXLAPACK: info = " << info << std::endl;
686 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
687 ASSERT(0);
688 }
689 # endif
690 return info;
691
692 }
693
694 template <typename IndexType, typename MH, typename MZ, typename VSR,
695 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
696 void
697 laqr2(bool wantT,
698 bool wantZ,
699 IndexType kTop,
700 IndexType kBot,
701 IndexType nw,
702 GeMatrix<MH> &H,
703 IndexType iLoZ,
704 IndexType iHiZ,
705 GeMatrix<MZ> &Z,
706 IndexType &ns,
707 IndexType &nd,
708 DenseVector<VSR> &sr,
709 DenseVector<VSI> &si,
710 GeMatrix<MV> &V,
711 GeMatrix<MT> &T,
712 GeMatrix<MWV> &WV,
713 DenseVector<VWORK> &work)
714 {
715 LAPACK_DEBUG_OUT("laqr2");
716
717 using std::max;
718 //
719 // Test the input parameters
720 //
721 # ifndef NDEBUG
722 ASSERT(H.firstRow()==1);
723 ASSERT(H.firstCol()==1);
724 ASSERT(H.numRows()==H.numCols());
725
726 const IndexType n = H.numRows();
727
728 if (wantZ) {
729 ASSERT(1<=iLoZ);
730 ASSERT(iLoZ<=iHiZ);
731 ASSERT(iHiZ<=n);
732
733 ASSERT(Z.firstRow()==1);
734 ASSERT(Z.firstCol()==1);
735 ASSERT(Z.numRows()==n);
736 ASSERT(Z.numCols()==n);
737 }
738
739 ASSERT(sr.length()==kBot);
740 ASSERT(si.length()==kBot);
741
742 ASSERT(V.firstRow()==1);
743 ASSERT(V.firstCol()==1);
744 ASSERT(V.numRows()==nw);
745 ASSERT(V.numCols()==nw);
746
747 const IndexType nh = T.numCols();
748 ASSERT(nh>=nw);
749
750 ASSERT(T.firstRow()==1);
751 ASSERT(T.firstCol()==1);
752
753 const IndexType nv = WV.numRows();
754 ASSERT(nv>=nw);
755
756 ASSERT((work.length()==0) || (work.length()>=n));
757 # endif
758
759 //
760 // Make copies of output arguments
761 //
762 # ifdef CHECK_CXXLAPACK
763 typename GeMatrix<MH>::NoView H_org = H;
764 typename GeMatrix<MZ>::NoView Z_org = Z;
765 IndexType ns_org = ns;
766 IndexType nd_org = nd;
767 typename DenseVector<VSR>::NoView sr_org = sr;
768 typename DenseVector<VSI>::NoView si_org = si;
769 typename GeMatrix<MV>::NoView V_org = V;
770 typename GeMatrix<MT>::NoView T_org = T;
771 typename GeMatrix<MWV>::NoView WV_org = WV;
772 typename DenseVector<VWORK>::NoView work_org = work;
773 # endif
774
775 //
776 // Call implementation
777 //
778 laqr2_generic(wantT, wantZ, kTop, kBot, nw, H,
779 iLoZ, iHiZ, Z, ns, nd, sr, si,
780 V, T, WV, work);
781
782 # ifdef CHECK_CXXLAPACK
783 //
784 // Make copies of results computed by the generic implementation
785 //
786 typename GeMatrix<MH>::NoView H_generic = H;
787 typename GeMatrix<MZ>::NoView Z_generic = Z;
788 IndexType ns_generic = ns;
789 IndexType nd_generic = nd;
790 typename DenseVector<VSR>::NoView sr_generic = sr;
791 typename DenseVector<VSI>::NoView si_generic = si;
792 typename GeMatrix<MV>::NoView V_generic = V;
793 typename GeMatrix<MT>::NoView T_generic = T;
794 typename GeMatrix<MWV>::NoView WV_generic = WV;
795 typename DenseVector<VWORK>::NoView work_generic = work;
796
797 //
798 // restore output arguments
799 //
800 H = H_org;
801 Z = Z_org;
802 ns = ns_org;
803 nd = nd_org;
804 sr = sr_org;
805 si = si_org;
806 V = V_org;
807 T = T_org;
808 WV = WV_org;
809 work = work_org;
810
811 //
812 // Compare results
813 //
814 laqr2_native(wantT, wantZ, kTop, kBot, nw, H,
815 iLoZ, iHiZ, Z, ns, nd, sr, si,
816 V, T, WV, work);
817
818 bool failed = false;
819 if (! isIdentical(H_generic, H, " H_generic", "H")) {
820 std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
821 std::cerr << "F77LAPACK: H = " << H << std::endl;
822 std::cerr << "Original: H_org = " << H_org << std::endl;
823 failed = true;
824 }
825
826 if (! isIdentical(Z_generic, Z, "Z_generic", "Z")) {
827 std::cerr << "CXXLAPACK: Z_generic = " << Z_generic << std::endl;
828 std::cerr << "F77LAPACK: Z = " << Z << std::endl;
829 failed = true;
830 }
831
832 if (! isIdentical(ns_generic, ns, "ns_generic", "ns")) {
833 std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
834 std::cerr << "F77LAPACK: ns = " << ns << std::endl;
835 failed = true;
836 }
837
838 if (! isIdentical(nd_generic, nd, "nd_generic", "nd")) {
839 std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
840 std::cerr << "F77LAPACK: nd = " << nd << std::endl;
841 failed = true;
842 }
843
844 if (! isIdentical(sr_generic, sr, "sr_generic", "_sr")) {
845 std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
846 std::cerr << "F77LAPACK: sr = " << sr << std::endl;
847 failed = true;
848 }
849
850 if (! isIdentical(si_generic, si, "si_generic", "si")) {
851 std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
852 std::cerr << "F77LAPACK: si = " << si << std::endl;
853 failed = true;
854 }
855
856 if (! isIdentical(V_generic, V, "V_generic", "V")) {
857 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
858 std::cerr << "F77LAPACK: V = " << V << std::endl;
859 failed = true;
860 }
861
862 if (! isIdentical(T_generic, T, "T_generic", "T")) {
863 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
864 std::cerr << "F77LAPACK: T = " << T << std::endl;
865 failed = true;
866 }
867
868 if (! isIdentical(WV_generic, WV, "WV_generic", "_WV")) {
869 std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
870 std::cerr << "F77LAPACK: WV = " << WV << std::endl;
871 failed = true;
872 }
873
874 if (! isIdentical(work_generic, work, "work_generic", "_work")) {
875 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
876 std::cerr << "F77LAPACK: work = " << work << std::endl;
877 failed = true;
878 }
879
880 if (failed) {
881 std::cerr << "error in: laqr2.tcc" << std::endl;
882 ASSERT(0);
883 } else {
884 // std::cerr << "passed: laqr2.tcc" << std::endl;
885 }
886 # endif
887 }
888
889 //-- forwarding ----------------------------------------------------------------
890 template <typename IndexType, typename MT>
891 IndexType
892 laqr2_wsq(IndexType kTop,
893 IndexType kBot,
894 IndexType nw,
895 const MT &&T)
896 {
897 CHECKPOINT_ENTER;
898 const IndexType info = laqr2_wsq(kTop, kBot, nw, T);
899 CHECKPOINT_LEAVE;
900
901 return info;
902 }
903
904 template <typename IndexType, typename MH, typename MZ, typename VSR,
905 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
906 void
907 laqr2(bool wantT,
908 bool wantZ,
909 IndexType kTop,
910 IndexType kBot,
911 IndexType nw,
912 MH &&H,
913 IndexType iLoZ,
914 IndexType iHiZ,
915 MZ &&Z,
916 IndexType &ns,
917 IndexType &nd,
918 VSR &&sr,
919 VSI &&si,
920 MV &&V,
921 MT &&T,
922 MWV &&WV,
923 VWORK &&work)
924 {
925 CHECKPOINT_ENTER;
926 laqr2(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
927 V, T, WV, work);
928 CHECKPOINT_LEAVE;
929 }
930
931 } } // namespace lapack, flens
932
933 #endif // FLENS_LAPACK_EIG_LAQR3_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQR2( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
36 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
37 $ LDT, NV, WV, LDWV, WORK, LWORK )
38 *
39 * -- LAPACK auxiliary routine (version 3.2.2) --
40 * Univ. of Tennessee, Univ. of California Berkeley,
41 * Univ. of Colorado Denver and NAG Ltd..
42 * -- June 2010 --
43 *
44 */
45
46 #ifndef FLENS_LAPACK_EIG_LAQR2_TCC
47 #define FLENS_LAPACK_EIG_LAQR2_TCC 1
48
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename IndexType, typename MT>
57 IndexType
58 laqr2_generic_wsq(IndexType kTop,
59 IndexType kBot,
60 IndexType nw,
61 const GeMatrix<MT> &T)
62 {
63 using std::max;
64 using std::min;
65
66 typedef typename GeMatrix<MT>::ElementType ElementType;
67
68 const Underscore<IndexType> _;
69
70 //
71 // ==== Estimate optimal workspace. ====
72 //
73 IndexType jw = min(nw, kBot-kTop+1);
74 auto _T = T(_(1,jw),_(1,jw));
75
76 IndexType lWorkOpt;
77 if (jw<=2) {
78 lWorkOpt = 1;
79 } else {
80 //
81 // ==== Workspace query call to DGEHRD ====
82 //
83 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
84 //
85 // ==== Workspace query call to DORMHR ====
86 //
87 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
88 //
89 // ==== Optimal workspace ====
90 //
91 lWorkOpt = jw+max(lWork1,lWork2);
92 }
93 return lWorkOpt;
94 }
95
96 template <typename IndexType, typename MH, typename MZ, typename VSR,
97 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
98 void
99 laqr2_generic(bool wantT,
100 bool wantZ,
101 IndexType kTop,
102 IndexType kBot,
103 IndexType nw,
104 GeMatrix<MH> &H,
105 IndexType iLoZ,
106 IndexType iHiZ,
107 GeMatrix<MZ> &Z,
108 IndexType &ns,
109 IndexType &nd,
110 DenseVector<VSR> &sr,
111 DenseVector<VSI> &si,
112 GeMatrix<MV> &V,
113 GeMatrix<MT> &T,
114 GeMatrix<MWV> &WV,
115 DenseVector<VWORK> &work)
116 {
117 using std::abs;
118 using std::max;
119 using std::min;
120
121 typedef typename GeMatrix<MH>::ElementType ElementType;
122
123 const Underscore<IndexType> _;
124
125 const ElementType Zero(0), One(1);
126 const IndexType n = H.numRows();
127 const IndexType nh = T.numCols();
128 const IndexType nv = WV.numRows();
129 //
130 // ==== Estimate optimal workspace. ====
131 //
132 IndexType jw = min(nw, kBot-kTop+1);
133 auto T_jw = T(_(1,jw),_(1,jw));
134 auto V_jw = V(_(1,jw),_(1,jw));
135
136 IndexType lWorkOpt;
137 if (jw<=2) {
138 lWorkOpt = 1;
139 } else {
140 //
141 // ==== Workspace query call to DGEHRD ====
142 //
143 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
144 //
145 // ==== Workspace query call to DORMHR ====
146 //
147 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
148 //
149 // ==== Optimal workspace ====
150 //
151 lWorkOpt = jw+max(lWork1,lWork2);
152 }
153 //
154 // ==== Apply worksize query ====
155 //
156 if (work.length()==0) {
157 work.resize(lWorkOpt);
158 }
159 const IndexType lWork = work.length();
160 //
161 // ==== Nothing to do ...
162 // ... for an empty active block ... ====
163 ns = 0;
164 nd = 0;
165 work(1) = One;
166 if (kTop>kBot) {
167 return;
168 }
169 // ... nor for an empty deflation window. ====
170 if (nw<1) {
171 return;
172 }
173 //
174 // ==== Machine constants ====
175 //
176 ElementType safeMin = lamch<ElementType>(SafeMin);
177 ElementType safeMax = One / safeMin;
178 labad(safeMin, safeMax);
179 const ElementType ulp = lamch<ElementType>(Precision);
180 const ElementType smallNum = safeMin*(ElementType(n)/ulp);
181 //
182 // ==== Setup deflation window ====
183 //
184 IndexType kwTop = kBot - jw + 1;
185 auto sr_kw = sr(_(kwTop,kBot));
186 auto si_kw = si(_(kwTop,kBot));
187 auto H_kw = H(_(kwTop,kBot),_(kwTop,kBot));
188 ElementType s;
189
190 if( kwTop==kTop) {
191 s = Zero;
192 } else {
193 s = H(kwTop, kwTop-1);
194 }
195
196 if (kBot==kwTop) {
197 //
198 // ==== 1-by-1 deflation window: not much to do ====
199 //
200 sr(kwTop) = H(kwTop,kwTop);
201 si(kwTop) = Zero;
202 ns = 1;
203 nd = 0;
204 if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
205 ns = 0;
206 nd = 1;
207 if (kwTop>kTop) {
208 H(kwTop, kwTop-1) = Zero;
209 }
210 }
211 work(1) = One;
212 return;
213 }
214 //
215 // ==== Convert to spike-triangular form. (In case of a
216 // . rare QR failure, this routine continues to do
217 // . aggressive early deflation using that part of
218 // . the deflation window that converged using INFQR
219 // . here and there to keep track.) ====
220 //
221 T_jw.upper() = H_kw.upper();
222 T_jw.diag(-1) = H_kw.diag(-1);
223
224 V_jw = Zero;
225 V_jw.diag(0) = One;
226
227 IndexType infoQR;
228 infoQR = lahqr(true, true,
229 IndexType(1), jw, T_jw,
230 sr_kw, si_kw,
231 IndexType(1), jw, V_jw);
232 //
233 // ==== DTREXC needs a clean margin near the diagonal ====
234 //
235 for (IndexType j=1; j<=jw-3; ++j) {
236 T(j+2, j) = Zero;
237 T(j+3, j) = Zero;
238 }
239 if (jw>2) {
240 T(jw,jw-2) = Zero;
241 }
242 //
243 // ==== Deflation detection loop ====
244 //
245 ns = jw;
246 IndexType iFirst;
247 IndexType iLast = infoQR + 1;
248 while (iLast<=ns) {
249 bool bulge;
250 if (ns==1) {
251 bulge = false;
252 } else {
253 bulge = T(ns,ns-1)!=Zero;
254 }
255 //
256 // ==== Small spike tip test for deflation ====
257 //
258 if (!bulge) {
259 //
260 // ==== Real eigenvalue ====
261 //
262 ElementType foo = abs(T(ns,ns));
263 if (foo==Zero) {
264 foo = abs(s);
265 }
266 if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
267 //
268 // ==== Deflatable ====
269 //
270 ns = ns - 1;
271 } else {
272 //
273 // ==== Undeflatable. Move it up out of the way.
274 // . (DTREXC can not fail in this case.) ====
275 //
276 iFirst = ns;
277 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
278 ++iLast;
279 }
280 } else {
281 //
282 // ==== Complex conjugate pair ====
283 //
284 ElementType foo = abs(T(ns,ns))
285 + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
286 if (foo==Zero) {
287 foo = abs(s);
288 }
289 if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
290 //
291 // ==== Deflatable ====
292 //
293 ns -= 2;
294 } else {
295 //
296 // ==== Undeflatable. Move them up out of the way.
297 // . Fortunately, DTREXC does the right thing with
298 // . ILST in case of a rare exchange failure. ====
299 //
300 iFirst = ns;
301 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
302 iLast += 2;
303 }
304 }
305 //
306 // ==== End deflation detection loop ====
307 //
308 }
309 //
310 // ==== Return to Hessenberg form ====
311 //
312 if (ns==0) {
313 s = Zero;
314 }
315
316 if (ns<jw) {
317 //
318 // ==== sorting diagonal blocks of T improves accuracy for
319 // . graded matrices. Bubble sort deals well with
320 // . exchange failures. ====
321 //
322 bool sorted = false;
323 IndexType i = ns + 1;
324 while (!sorted) {
325 sorted = true;
326
327 IndexType kEnd = i - 1;
328 i = infoQR + 1;
329
330 IndexType k;
331 if (i==ns) {
332 k = i + 1;
333 } else if (T(i+1,i)==Zero) {
334 k = i + 1;
335 } else {
336 k = i + 2;
337 }
338 while (k<=kEnd) {
339 ElementType evi, evk;
340
341 if (k==i+1) {
342 evi = abs(T(i,i));
343 } else {
344 evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
345 }
346
347 if (k==kEnd) {
348 evk = abs(T(k,k));
349 } else if(T(k+1,k)==Zero) {
350 evk = abs(T(k,k));
351 } else {
352 evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
353 }
354
355 if (evi>=evk) {
356 i = k;
357 } else {
358 sorted = false;
359 iFirst = i;
360 iLast = k;
361 IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
362 work(_(1,jw)));
363 if (info==0) {
364 i = iLast;
365 } else {
366 i = k;
367 }
368 }
369 if (i==kEnd) {
370 k = i + 1;
371 } else if (T(i+1,i)==Zero) {
372 k = i + 1;
373 } else {
374 k = i + 2;
375 }
376 }
377 }
378 }
379 //
380 // ==== Restore shift/eigenvalue array from T ====
381 //
382 IndexType i = jw;
383 while (i>=infoQR+1) {
384 if (i==infoQR+1) {
385 sr(kwTop+i-1) = T(i,i);
386 si(kwTop+i-1) = Zero;
387 i = i - 1;
388 } else if (T(i,i-1)==Zero) {
389 sr(kwTop+i-1 ) = T(i,i);
390 si(kwTop+i-1 ) = Zero;
391 i = i - 1;
392 } else {
393 ElementType aa = T(i-1,i-1);
394 ElementType cc = T(i, i-1);
395 ElementType bb = T(i-1,i );
396 ElementType dd = T(i, i );
397 ElementType cs, sn;
398 lanv2(aa, bb, cc, dd,
399 sr(kwTop+i-2), si(kwTop+i-2),
400 sr(kwTop+i-1), si(kwTop+i-1),
401 cs, sn);
402 i -= 2;
403 }
404 }
405
406 if (ns<jw || s==Zero) {
407
408 if (ns>1 && s!=Zero) {
409 //
410 // ==== Reflect spike back into lower triangle ====
411 //
412 work(_(1,ns)) = V(1,_(1,ns));
413 ElementType beta = work(1);
414 ElementType tau;
415
416 larfg(ns, beta, work(_(2,ns)), tau);
417 work(1) = One;
418
419 T(_(3,jw),_(1,jw-2)).lower() = Zero;
420
421 const auto _v = work(_(1,ns));
422
423 larf(Left, _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
424 larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
425 larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
426
427 hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
428 }
429 //
430 // ==== Copy updated reduced window into place ====
431 //
432 if (kwTop>1) {
433 H(kwTop,kwTop-1) = s*V(1,1);
434 }
435 H_kw.upper() = T_jw.upper();
436 H_kw.diag(-1) = T_jw.diag(-1);
437 //
438 // ==== Accumulate orthogonal matrix in order update
439 // . H and Z, if requested. ====
440 //
441 if (ns>1 && s!=Zero) {
442 ormhr(Right, NoTrans,
443 IndexType(1), ns,
444 T(_(1,ns),_(1,ns)),
445 work(_(1,ns-1)),
446 V(_(1,jw),_(1,ns)),
447 work(_(jw+1,lWork)));
448 }
449 //
450 // ==== Update vertical slab in H ====
451 //
452 const IndexType lTop = (wantT) ? 1 : kTop;
453
454 for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
455 const IndexType kLn = min(nv,kwTop-kRow);
456 blas::mm(NoTrans, NoTrans, One,
457 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
458 V(_(1,jw),_(1,jw)),
459 Zero,
460 WV(_(1,kLn),_(1,jw)));
461 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
462 }
463 //
464 // ==== Update horizontal slab in H ====
465 //
466 if (wantT) {
467 for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
468 const IndexType kLn = min(nh,n-kCol+1);
469 blas::mm(ConjTrans, NoTrans, One,
470 V(_(1,jw),_(1,jw)),
471 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
472 Zero,
473 T(_(1,jw),_(1,kLn)));
474 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
475 }
476 }
477 //
478 // ==== Update vertical slab in Z ====
479 //
480 if (wantZ) {
481 for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
482 const IndexType kLn = min(nv,iHiZ-kRow+1);
483 blas::mm(NoTrans, NoTrans, One,
484 Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
485 V(_(1,jw),_(1,jw)),
486 Zero,
487 WV(_(1,kLn),_(1,jw)));
488 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
489 }
490 }
491 }
492 //
493 // ==== Return the number of deflations ... ====
494 //
495 nd = jw - ns;
496 //
497 // ==== ... and the number of shifts. (Subtracting
498 // . INFQR from the spike length takes care
499 // . of the case of a rare QR failure while
500 // . calculating eigenvalues of the deflation
501 // . window.) ====
502 //
503 ns = ns - infoQR;
504 //
505 // ==== Return optimal workspace. ====
506 //
507 work(1) = lWorkOpt;
508 }
509
510 //== interface for native lapack ===============================================
511
512 #ifdef CHECK_CXXLAPACK
513
514 template <typename IndexType, typename MT>
515 IndexType
516 laqr2_native_wsq(IndexType kTop,
517 IndexType kBot,
518 IndexType nw,
519 const GeMatrix<MT> &T)
520 {
521 typedef typename GeMatrix<MT>::ElementType ElementType;
522
523 const LOGICAL WANTT = false;
524 const LOGICAL WANTZ = false;
525 const INTEGER N = 1;
526 const INTEGER KTOP = kTop;
527 const INTEGER KBOT = kBot;
528 const INTEGER NW = nw;
529 const INTEGER LDH = 1;
530 const INTEGER ILOZ = 0;
531 const INTEGER IHIZ = 0;
532 const INTEGER LDZ = 1;
533 INTEGER NS;
534 INTEGER ND;
535 const INTEGER LDV = nw;
536 const INTEGER NH = T.numCols();
537 const INTEGER LDT = T.leadingDimension();
538 const INTEGER NV = nw;
539 const INTEGER LDWV = nw;
540 ElementType WORK;
541 const INTEGER LWORK = -1;
542 ElementType DUMMY;
543
544 if (IsSame<ElementType,DOUBLE>::value) {
545 LAPACK_IMPL(dlaqr2)(&WANTT,
546 &WANTZ,
547 &N,
548 &KTOP,
549 &KBOT,
550 &NW,
551 &DUMMY,
552 &LDH,
553 &ILOZ,
554 &IHIZ,
555 &DUMMY,
556 &LDZ,
557 &NS,
558 &ND,
559 &DUMMY,
560 &DUMMY,
561 &DUMMY,
562 &LDV,
563 &NH,
564 &DUMMY,
565 &LDT,
566 &NV,
567 &DUMMY,
568 &LDWV,
569 &WORK,
570 &LWORK);
571 } else {
572 ASSERT(0);
573 }
574 return WORK;
575 }
576
577 template <typename IndexType, typename MH, typename MZ, typename VSR,
578 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
579 void
580 laqr2_native(bool wantT,
581 bool wantZ,
582 IndexType kTop,
583 IndexType kBot,
584 IndexType nw,
585 GeMatrix<MH> &H,
586 IndexType iLoZ,
587 IndexType iHiZ,
588 GeMatrix<MZ> &Z,
589 IndexType &ns,
590 IndexType &nd,
591 DenseVector<VSR> &sr,
592 DenseVector<VSI> &si,
593 GeMatrix<MV> &V,
594 GeMatrix<MT> &T,
595 GeMatrix<MWV> &WV,
596 DenseVector<VWORK> &work)
597 {
598 typedef typename GeMatrix<MH>::ElementType ElementType;
599
600 const LOGICAL WANTT = wantT;
601 const LOGICAL WANTZ = wantZ;
602 const INTEGER N = H.numRows();
603 const INTEGER KTOP = kTop;
604 const INTEGER KBOT = kBot;
605 const INTEGER NW = nw;
606 const INTEGER LDH = H.leadingDimension();
607 const INTEGER ILOZ = iLoZ;
608 const INTEGER IHIZ = iHiZ;
609 const INTEGER LDZ = Z.leadingDimension();
610 INTEGER NS = ns;
611 INTEGER ND = nd;
612 const INTEGER LDV = V.leadingDimension();
613 const INTEGER NH = T.numCols();
614 const INTEGER LDT = T.leadingDimension();
615 const INTEGER NV = WV.numRows();
616 const INTEGER LDWV = WV.leadingDimension();
617 const INTEGER LWORK = work.length();
618
619 if (IsSame<ElementType,DOUBLE>::value) {
620 LAPACK_IMPL(dlaqr2)(&WANTT,
621 &WANTZ,
622 &N,
623 &KTOP,
624 &KBOT,
625 &NW,
626 H.data(),
627 &LDH,
628 &ILOZ,
629 &IHIZ,
630 Z.data(),
631 &LDZ,
632 &NS,
633 &ND,
634 sr.data(),
635 si.data(),
636 V.data(),
637 &LDV,
638 &NH,
639 T.data(),
640 &LDT,
641 &NV,
642 WV.data(),
643 &LDWV,
644 work.data(),
645 &LWORK);
646 } else {
647 ASSERT(0);
648 }
649 ns = NS;
650 nd = ND;
651 }
652
653 #endif // CHECK_CXXLAPACK
654
655 //== public interface ==========================================================
656 template <typename IndexType, typename MT>
657 IndexType
658 laqr2_wsq(IndexType kTop,
659 IndexType kBot,
660 IndexType nw,
661 const GeMatrix<MT> &T)
662 {
663 using std::max;
664 //
665 // Test the input parameters
666 //
667 # ifndef NDEBUG
668 ASSERT(T.firstRow()==1);
669 ASSERT(T.firstCol()==1);
670 ASSERT(T.numRows()==T.numCols());
671 # endif
672
673 //
674 // Call implementation
675 //
676 IndexType info = laqr2_generic_wsq(kTop, kBot, nw, T);
677
678 # ifdef CHECK_CXXLAPACK
679 //
680 // Compare results
681 //
682 IndexType _info = laqr2_native_wsq(kTop, kBot, nw, T);
683
684 if (info!=_info) {
685 std::cerr << "CXXLAPACK: info = " << info << std::endl;
686 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
687 ASSERT(0);
688 }
689 # endif
690 return info;
691
692 }
693
694 template <typename IndexType, typename MH, typename MZ, typename VSR,
695 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
696 void
697 laqr2(bool wantT,
698 bool wantZ,
699 IndexType kTop,
700 IndexType kBot,
701 IndexType nw,
702 GeMatrix<MH> &H,
703 IndexType iLoZ,
704 IndexType iHiZ,
705 GeMatrix<MZ> &Z,
706 IndexType &ns,
707 IndexType &nd,
708 DenseVector<VSR> &sr,
709 DenseVector<VSI> &si,
710 GeMatrix<MV> &V,
711 GeMatrix<MT> &T,
712 GeMatrix<MWV> &WV,
713 DenseVector<VWORK> &work)
714 {
715 LAPACK_DEBUG_OUT("laqr2");
716
717 using std::max;
718 //
719 // Test the input parameters
720 //
721 # ifndef NDEBUG
722 ASSERT(H.firstRow()==1);
723 ASSERT(H.firstCol()==1);
724 ASSERT(H.numRows()==H.numCols());
725
726 const IndexType n = H.numRows();
727
728 if (wantZ) {
729 ASSERT(1<=iLoZ);
730 ASSERT(iLoZ<=iHiZ);
731 ASSERT(iHiZ<=n);
732
733 ASSERT(Z.firstRow()==1);
734 ASSERT(Z.firstCol()==1);
735 ASSERT(Z.numRows()==n);
736 ASSERT(Z.numCols()==n);
737 }
738
739 ASSERT(sr.length()==kBot);
740 ASSERT(si.length()==kBot);
741
742 ASSERT(V.firstRow()==1);
743 ASSERT(V.firstCol()==1);
744 ASSERT(V.numRows()==nw);
745 ASSERT(V.numCols()==nw);
746
747 const IndexType nh = T.numCols();
748 ASSERT(nh>=nw);
749
750 ASSERT(T.firstRow()==1);
751 ASSERT(T.firstCol()==1);
752
753 const IndexType nv = WV.numRows();
754 ASSERT(nv>=nw);
755
756 ASSERT((work.length()==0) || (work.length()>=n));
757 # endif
758
759 //
760 // Make copies of output arguments
761 //
762 # ifdef CHECK_CXXLAPACK
763 typename GeMatrix<MH>::NoView H_org = H;
764 typename GeMatrix<MZ>::NoView Z_org = Z;
765 IndexType ns_org = ns;
766 IndexType nd_org = nd;
767 typename DenseVector<VSR>::NoView sr_org = sr;
768 typename DenseVector<VSI>::NoView si_org = si;
769 typename GeMatrix<MV>::NoView V_org = V;
770 typename GeMatrix<MT>::NoView T_org = T;
771 typename GeMatrix<MWV>::NoView WV_org = WV;
772 typename DenseVector<VWORK>::NoView work_org = work;
773 # endif
774
775 //
776 // Call implementation
777 //
778 laqr2_generic(wantT, wantZ, kTop, kBot, nw, H,
779 iLoZ, iHiZ, Z, ns, nd, sr, si,
780 V, T, WV, work);
781
782 # ifdef CHECK_CXXLAPACK
783 //
784 // Make copies of results computed by the generic implementation
785 //
786 typename GeMatrix<MH>::NoView H_generic = H;
787 typename GeMatrix<MZ>::NoView Z_generic = Z;
788 IndexType ns_generic = ns;
789 IndexType nd_generic = nd;
790 typename DenseVector<VSR>::NoView sr_generic = sr;
791 typename DenseVector<VSI>::NoView si_generic = si;
792 typename GeMatrix<MV>::NoView V_generic = V;
793 typename GeMatrix<MT>::NoView T_generic = T;
794 typename GeMatrix<MWV>::NoView WV_generic = WV;
795 typename DenseVector<VWORK>::NoView work_generic = work;
796
797 //
798 // restore output arguments
799 //
800 H = H_org;
801 Z = Z_org;
802 ns = ns_org;
803 nd = nd_org;
804 sr = sr_org;
805 si = si_org;
806 V = V_org;
807 T = T_org;
808 WV = WV_org;
809 work = work_org;
810
811 //
812 // Compare results
813 //
814 laqr2_native(wantT, wantZ, kTop, kBot, nw, H,
815 iLoZ, iHiZ, Z, ns, nd, sr, si,
816 V, T, WV, work);
817
818 bool failed = false;
819 if (! isIdentical(H_generic, H, " H_generic", "H")) {
820 std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
821 std::cerr << "F77LAPACK: H = " << H << std::endl;
822 std::cerr << "Original: H_org = " << H_org << std::endl;
823 failed = true;
824 }
825
826 if (! isIdentical(Z_generic, Z, "Z_generic", "Z")) {
827 std::cerr << "CXXLAPACK: Z_generic = " << Z_generic << std::endl;
828 std::cerr << "F77LAPACK: Z = " << Z << std::endl;
829 failed = true;
830 }
831
832 if (! isIdentical(ns_generic, ns, "ns_generic", "ns")) {
833 std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
834 std::cerr << "F77LAPACK: ns = " << ns << std::endl;
835 failed = true;
836 }
837
838 if (! isIdentical(nd_generic, nd, "nd_generic", "nd")) {
839 std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
840 std::cerr << "F77LAPACK: nd = " << nd << std::endl;
841 failed = true;
842 }
843
844 if (! isIdentical(sr_generic, sr, "sr_generic", "_sr")) {
845 std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
846 std::cerr << "F77LAPACK: sr = " << sr << std::endl;
847 failed = true;
848 }
849
850 if (! isIdentical(si_generic, si, "si_generic", "si")) {
851 std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
852 std::cerr << "F77LAPACK: si = " << si << std::endl;
853 failed = true;
854 }
855
856 if (! isIdentical(V_generic, V, "V_generic", "V")) {
857 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
858 std::cerr << "F77LAPACK: V = " << V << std::endl;
859 failed = true;
860 }
861
862 if (! isIdentical(T_generic, T, "T_generic", "T")) {
863 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
864 std::cerr << "F77LAPACK: T = " << T << std::endl;
865 failed = true;
866 }
867
868 if (! isIdentical(WV_generic, WV, "WV_generic", "_WV")) {
869 std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
870 std::cerr << "F77LAPACK: WV = " << WV << std::endl;
871 failed = true;
872 }
873
874 if (! isIdentical(work_generic, work, "work_generic", "_work")) {
875 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
876 std::cerr << "F77LAPACK: work = " << work << std::endl;
877 failed = true;
878 }
879
880 if (failed) {
881 std::cerr << "error in: laqr2.tcc" << std::endl;
882 ASSERT(0);
883 } else {
884 // std::cerr << "passed: laqr2.tcc" << std::endl;
885 }
886 # endif
887 }
888
889 //-- forwarding ----------------------------------------------------------------
890 template <typename IndexType, typename MT>
891 IndexType
892 laqr2_wsq(IndexType kTop,
893 IndexType kBot,
894 IndexType nw,
895 const MT &&T)
896 {
897 CHECKPOINT_ENTER;
898 const IndexType info = laqr2_wsq(kTop, kBot, nw, T);
899 CHECKPOINT_LEAVE;
900
901 return info;
902 }
903
904 template <typename IndexType, typename MH, typename MZ, typename VSR,
905 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
906 void
907 laqr2(bool wantT,
908 bool wantZ,
909 IndexType kTop,
910 IndexType kBot,
911 IndexType nw,
912 MH &&H,
913 IndexType iLoZ,
914 IndexType iHiZ,
915 MZ &&Z,
916 IndexType &ns,
917 IndexType &nd,
918 VSR &&sr,
919 VSI &&si,
920 MV &&V,
921 MT &&T,
922 MWV &&WV,
923 VWORK &&work)
924 {
925 CHECKPOINT_ENTER;
926 laqr2(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
927 V, T, WV, work);
928 CHECKPOINT_LEAVE;
929 }
930
931 } } // namespace lapack, flens
932
933 #endif // FLENS_LAPACK_EIG_LAQR3_TCC