1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
36 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
37 $ LDT, NV, WV, LDWV, WORK, LWORK )
38 *
39 * -- LAPACK auxiliary routine (version 3.2.2) --
40 * Univ. of Tennessee, Univ. of California Berkeley,
41 * Univ. of Colorado Denver and NAG Ltd..
42 * -- June 2010 --
43 *
44 */
45
46 #ifndef FLENS_LAPACK_EIG_LAQR3_TCC
47 #define FLENS_LAPACK_EIG_LAQR3_TCC 1
48
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename IndexType, typename MT>
57 IndexType
58 laqr3_generic_wsq(IndexType kTop,
59 IndexType kBot,
60 IndexType nw,
61 const GeMatrix<MT> &T)
62 {
63 using std::max;
64 using std::min;
65
66 typedef typename GeMatrix<MT>::ElementType ElementType;
67
68 const Underscore<IndexType> _;
69
70 //
71 // ==== Estimate optimal workspace. ====
72 //
73 IndexType jw = min(nw, kBot-kTop+1);
74 auto _T = T(_(1,jw),_(1,jw));
75
76 IndexType lWorkOpt;
77 if (jw<=2) {
78 lWorkOpt = 1;
79 } else {
80 //
81 // ==== Workspace query call to DGEHRD ====
82 //
83 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
84 //
85 // ==== Workspace query call to DORMHR ====
86 //
87 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
88 //
89 // ==== Workspace query call to DLAQR4 ====
90 //
91 IndexType lWork3 = laqr4_wsq(true, true, IndexType(1), jw, _T);
92 //
93 // ==== Optimal workspace ====
94 //
95 lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
96 }
97 return lWorkOpt;
98 }
99
100 template <typename IndexType, typename MH, typename MZ, typename VSR,
101 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
102 void
103 laqr3_generic(bool wantT,
104 bool wantZ,
105 IndexType kTop,
106 IndexType kBot,
107 IndexType nw,
108 GeMatrix<MH> &H,
109 IndexType iLoZ,
110 IndexType iHiZ,
111 GeMatrix<MZ> &Z,
112 IndexType &ns,
113 IndexType &nd,
114 DenseVector<VSR> &sr,
115 DenseVector<VSI> &si,
116 GeMatrix<MV> &V,
117 GeMatrix<MT> &T,
118 GeMatrix<MWV> &WV,
119 DenseVector<VWORK> &work)
120 {
121 using std::abs;
122 using std::max;
123 using std::min;
124
125 typedef typename GeMatrix<MH>::ElementType ElementType;
126
127 const Underscore<IndexType> _;
128
129 const ElementType Zero(0), One(1);
130 const IndexType n = H.numRows();
131 const IndexType nh = T.numCols();
132 const IndexType nv = WV.numRows();
133 //
134 // ==== Estimate optimal workspace. ====
135 //
136 IndexType jw = min(nw, kBot-kTop+1);
137 auto T_jw = T(_(1,jw),_(1,jw));
138 auto V_jw = V(_(1,jw),_(1,jw));
139
140 IndexType lWorkOpt;
141 if (jw<=2) {
142 lWorkOpt = 1;
143 } else {
144 //
145 // ==== Workspace query call to DGEHRD ====
146 //
147 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
148 //
149 // ==== Workspace query call to DORMHR ====
150 //
151 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
152 //
153 // ==== Workspace query call to DLAQR4 ====
154 //
155 IndexType lWork3 = laqr4_wsq(true, true, IndexType(1), jw, T_jw);
156 //
157 // ==== Optimal workspace ====
158 //
159 lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
160 }
161 //
162 // ==== Apply worksize query ====
163 //
164 if (work.length()==0) {
165 work.resize(lWorkOpt);
166 }
167 const IndexType lWork = work.length();
168 //
169 // ==== Nothing to do ...
170 // ... for an empty active block ... ====
171 ns = 0;
172 nd = 0;
173 work(1) = One;
174 if (kTop>kBot) {
175 return;
176 }
177 // ... nor for an empty deflation window. ====
178 if (nw<1) {
179 return;
180 }
181 //
182 // ==== Machine constants ====
183 //
184 ElementType safeMin = lamch<ElementType>(SafeMin);
185 ElementType safeMax = One / safeMin;
186 labad(safeMin, safeMax);
187 const ElementType ulp = lamch<ElementType>(Precision);
188 const ElementType smallNum = safeMin*(ElementType(n)/ulp);
189 //
190 // ==== Setup deflation window ====
191 //
192 IndexType kwTop = kBot - jw + 1;
193 auto sr_kw = sr(_(kwTop,kBot));
194 auto si_kw = si(_(kwTop,kBot));
195 auto H_kw = H(_(kwTop,kBot),_(kwTop,kBot));
196 ElementType s;
197
198 if( kwTop==kTop) {
199 s = Zero;
200 } else {
201 s = H(kwTop, kwTop-1);
202 }
203
204 if (kBot==kwTop) {
205 //
206 // ==== 1-by-1 deflation window: not much to do ====
207 //
208 sr(kwTop) = H(kwTop,kwTop);
209 si(kwTop) = Zero;
210 ns = 1;
211 nd = 0;
212 if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
213 ns = 0;
214 nd = 1;
215 if (kwTop>kTop) {
216 H(kwTop, kwTop-1) = Zero;
217 }
218 }
219 work(1) = One;
220 return;
221 }
222 //
223 // ==== Convert to spike-triangular form. (In case of a
224 // . rare QR failure, this routine continues to do
225 // . aggressive early deflation using that part of
226 // . the deflation window that converged using INFQR
227 // . here and there to keep track.) ====
228 //
229 T_jw.upper() = H_kw.upper();
230 T_jw.diag(-1) = H_kw.diag(-1);
231
232 V_jw = Zero;
233 V_jw.diag(0) = One;
234
235 IndexType nMin = ilaenv<ElementType>(12, "LAQR3", "SV", jw, 1, jw, lWork);
236 IndexType infoQR;
237 if (jw>nMin) {
238 infoQR = laqr4(true, true,
239 IndexType(1), jw, T_jw,
240 sr_kw, si_kw,
241 IndexType(1), jw, V_jw,
242 work);
243 } else {
244 infoQR = lahqr(true, true,
245 IndexType(1), jw, T_jw,
246 sr_kw, si_kw,
247 IndexType(1), jw, V_jw);
248 }
249 //
250 // ==== DTREXC needs a clean margin near the diagonal ====
251 //
252 for (IndexType j=1; j<=jw-3; ++j) {
253 T(j+2, j) = Zero;
254 T(j+3, j) = Zero;
255 }
256 if (jw>2) {
257 T(jw,jw-2) = Zero;
258 }
259 //
260 // ==== Deflation detection loop ====
261 //
262 ns = jw;
263 IndexType iFirst;
264 IndexType iLast = infoQR + 1;
265 while (iLast<=ns) {
266 bool bulge;
267 if (ns==1) {
268 bulge = false;
269 } else {
270 bulge = T(ns,ns-1)!=Zero;
271 }
272 //
273 // ==== Small spike tip test for deflation ====
274 //
275 if (!bulge) {
276 //
277 // ==== Real eigenvalue ====
278 //
279 ElementType foo = abs(T(ns,ns));
280 if (foo==Zero) {
281 foo = abs(s);
282 }
283 if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
284 //
285 // ==== Deflatable ====
286 //
287 ns = ns - 1;
288 } else {
289 //
290 // ==== Undeflatable. Move it up out of the way.
291 // . (DTREXC can not fail in this case.) ====
292 //
293 iFirst = ns;
294 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
295 ++iLast;
296 }
297 } else {
298 //
299 // ==== Complex conjugate pair ====
300 //
301 ElementType foo = abs(T(ns,ns))
302 + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
303 if (foo==Zero) {
304 foo = abs(s);
305 }
306 if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
307 //
308 // ==== Deflatable ====
309 //
310 ns -= 2;
311 } else {
312 //
313 // ==== Undeflatable. Move them up out of the way.
314 // . Fortunately, DTREXC does the right thing with
315 // . ILST in case of a rare exchange failure. ====
316 //
317 iFirst = ns;
318 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
319 iLast += 2;
320 }
321 }
322 //
323 // ==== End deflation detection loop ====
324 //
325 }
326 //
327 // ==== Return to Hessenberg form ====
328 //
329 if (ns==0) {
330 s = Zero;
331 }
332
333 if (ns<jw) {
334 //
335 // ==== sorting diagonal blocks of T improves accuracy for
336 // . graded matrices. Bubble sort deals well with
337 // . exchange failures. ====
338 //
339 bool sorted = false;
340 IndexType i = ns + 1;
341 while (!sorted) {
342 sorted = true;
343
344 IndexType kEnd = i - 1;
345 i = infoQR + 1;
346
347 IndexType k;
348 if (i==ns) {
349 k = i + 1;
350 } else if (T(i+1,i)==Zero) {
351 k = i + 1;
352 } else {
353 k = i + 2;
354 }
355 while (k<=kEnd) {
356 ElementType evi, evk;
357
358 if (k==i+1) {
359 evi = abs(T(i,i));
360 } else {
361 evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
362 }
363
364 if (k==kEnd) {
365 evk = abs(T(k,k));
366 } else if(T(k+1,k)==Zero) {
367 evk = abs(T(k,k));
368 } else {
369 evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
370 }
371
372 if (evi>=evk) {
373 i = k;
374 } else {
375 sorted = false;
376 iFirst = i;
377 iLast = k;
378 IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
379 work(_(1,jw)));
380 if (info==0) {
381 i = iLast;
382 } else {
383 i = k;
384 }
385 }
386 if (i==kEnd) {
387 k = i + 1;
388 } else if (T(i+1,i)==Zero) {
389 k = i + 1;
390 } else {
391 k = i + 2;
392 }
393 }
394 }
395 }
396 //
397 // ==== Restore shift/eigenvalue array from T ====
398 //
399 IndexType i = jw;
400 while (i>=infoQR+1) {
401 if (i==infoQR+1) {
402 sr(kwTop+i-1) = T(i,i);
403 si(kwTop+i-1) = Zero;
404 i = i - 1;
405 } else if (T(i,i-1)==Zero) {
406 sr(kwTop+i-1 ) = T(i,i);
407 si(kwTop+i-1 ) = Zero;
408 i = i - 1;
409 } else {
410 ElementType aa = T(i-1,i-1);
411 ElementType cc = T(i, i-1);
412 ElementType bb = T(i-1,i );
413 ElementType dd = T(i, i );
414 ElementType cs, sn;
415 lanv2(aa, bb, cc, dd,
416 sr(kwTop+i-2), si(kwTop+i-2),
417 sr(kwTop+i-1), si(kwTop+i-1),
418 cs, sn);
419 i -= 2;
420 }
421 }
422
423 if (ns<jw || s==Zero) {
424 if (ns>1 && s!=Zero) {
425 //
426 // ==== Reflect spike back into lower triangle ====
427 //
428 work(_(1,ns)) = V(1,_(1,ns));
429 ElementType beta = work(1);
430 ElementType tau;
431
432 larfg(ns, beta, work(_(2,ns)), tau);
433 work(1) = One;
434
435 T(_(3,jw),_(1,jw-2)).lower() = Zero;
436
437 const auto _v = work(_(1,ns));
438
439 larf(Left, _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
440 larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
441 larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
442
443 hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
444 }
445 //
446 // ==== Copy updated reduced window into place ====
447 //
448 if (kwTop>1) {
449 H(kwTop,kwTop-1) = s*V(1,1);
450 }
451 H_kw.upper() = T_jw.upper();
452 H_kw.diag(-1) = T_jw.diag(-1);
453 //
454 // ==== Accumulate orthogonal matrix in order update
455 // . H and Z, if requested. ====
456 //
457 if (ns>1 && s!=Zero) {
458 ormhr(Right, NoTrans,
459 IndexType(1), ns,
460 T(_(1,ns),_(1,ns)),
461 work(_(1,ns-1)),
462 V(_(1,jw),_(1,ns)),
463 work(_(jw+1,lWork)));
464 }
465 //
466 // ==== Update vertical slab in H ====
467 //
468 const IndexType lTop = (wantT) ? 1 : kTop;
469
470 for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
471 const IndexType kLn = min(nv,kwTop-kRow);
472 blas::mm(NoTrans, NoTrans, One,
473 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
474 V(_(1,jw),_(1,jw)),
475 Zero,
476 WV(_(1,kLn),_(1,jw)));
477 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
478 }
479 //
480 // ==== Update horizontal slab in H ====
481 //
482 if (wantT) {
483 for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
484 const IndexType kLn = min(nh,n-kCol+1);
485 blas::mm(ConjTrans, NoTrans, One,
486 V(_(1,jw),_(1,jw)),
487 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
488 Zero,
489 T(_(1,jw),_(1,kLn)));
490 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
491 }
492 }
493 //
494 // ==== Update vertical slab in Z ====
495 //
496 if (wantZ) {
497 for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
498 const IndexType kLn = min(nv,iHiZ-kRow+1);
499 blas::mm(NoTrans, NoTrans, One,
500 Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
501 V(_(1,jw),_(1,jw)),
502 Zero,
503 WV(_(1,kLn),_(1,jw)));
504 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
505 }
506 }
507 }
508 //
509 // ==== Return the number of deflations ... ====
510 //
511 nd = jw - ns;
512 //
513 // ==== ... and the number of shifts. (Subtracting
514 // . INFQR from the spike length takes care
515 // . of the case of a rare QR failure while
516 // . calculating eigenvalues of the deflation
517 // . window.) ====
518 //
519 ns = ns - infoQR;
520 //
521 // ==== Return optimal workspace. ====
522 //
523 work(1) = lWorkOpt;
524 }
525
526 //== interface for native lapack ===============================================
527
528 #ifdef CHECK_CXXLAPACK
529
530 template <typename IndexType, typename MT>
531 IndexType
532 laqr3_native_wsq(IndexType kTop,
533 IndexType kBot,
534 IndexType nw,
535 const GeMatrix<MT> &T)
536 {
537 typedef typename GeMatrix<MT>::ElementType ElementType;
538
539 const LOGICAL WANTT = false;
540 const LOGICAL WANTZ = false;
541 const INTEGER N = 1;
542 const INTEGER KTOP = kTop;
543 const INTEGER KBOT = kBot;
544 const INTEGER NW = nw;
545 const INTEGER LDH = 1;
546 const INTEGER ILOZ = 0;
547 const INTEGER IHIZ = 0;
548 const INTEGER LDZ = 1;
549 INTEGER NS;
550 INTEGER ND;
551 const INTEGER LDV = nw;
552 const INTEGER NH = T.numCols();
553 const INTEGER LDT = T.leadingDimension();
554 const INTEGER NV = nw;
555 const INTEGER LDWV = nw;
556 ElementType WORK;
557 const INTEGER LWORK = -1;
558 ElementType DUMMY;
559
560 if (IsSame<ElementType,DOUBLE>::value) {
561 LAPACK_IMPL(dlaqr3)(&WANTT,
562 &WANTZ,
563 &N,
564 &KTOP,
565 &KBOT,
566 &NW,
567 &DUMMY,
568 &LDH,
569 &ILOZ,
570 &IHIZ,
571 &DUMMY,
572 &LDZ,
573 &NS,
574 &ND,
575 &DUMMY,
576 &DUMMY,
577 &DUMMY,
578 &LDV,
579 &NH,
580 &DUMMY,
581 &LDT,
582 &NV,
583 &DUMMY,
584 &LDWV,
585 &WORK,
586 &LWORK);
587 } else {
588 ASSERT(0);
589 }
590 return WORK;
591 }
592
593 template <typename IndexType, typename MH, typename MZ, typename VSR,
594 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
595 void
596 laqr3_native(bool wantT,
597 bool wantZ,
598 IndexType kTop,
599 IndexType kBot,
600 IndexType nw,
601 GeMatrix<MH> &H,
602 IndexType iLoZ,
603 IndexType iHiZ,
604 GeMatrix<MZ> &Z,
605 IndexType &ns,
606 IndexType &nd,
607 DenseVector<VSR> &sr,
608 DenseVector<VSI> &si,
609 GeMatrix<MV> &V,
610 GeMatrix<MT> &T,
611 GeMatrix<MWV> &WV,
612 DenseVector<VWORK> &work)
613 {
614 typedef typename GeMatrix<MH>::ElementType ElementType;
615
616 const LOGICAL WANTT = wantT;
617 const LOGICAL WANTZ = wantZ;
618 const INTEGER N = H.numRows();
619 const INTEGER KTOP = kTop;
620 const INTEGER KBOT = kBot;
621 const INTEGER NW = nw;
622 const INTEGER LDH = H.leadingDimension();
623 const INTEGER ILOZ = iLoZ;
624 const INTEGER IHIZ = iHiZ;
625 const INTEGER LDZ = Z.leadingDimension();
626 INTEGER NS = ns;
627 INTEGER ND = nd;
628 const INTEGER LDV = V.leadingDimension();
629 const INTEGER NH = T.numCols();
630 const INTEGER LDT = T.leadingDimension();
631 const INTEGER NV = WV.numRows();
632 const INTEGER LDWV = WV.leadingDimension();
633 const INTEGER LWORK = work.length();
634
635 if (IsSame<ElementType,DOUBLE>::value) {
636 LAPACK_IMPL(dlaqr3)(&WANTT,
637 &WANTZ,
638 &N,
639 &KTOP,
640 &KBOT,
641 &NW,
642 H.data(),
643 &LDH,
644 &ILOZ,
645 &IHIZ,
646 Z.data(),
647 &LDZ,
648 &NS,
649 &ND,
650 sr.data(),
651 si.data(),
652 V.data(),
653 &LDV,
654 &NH,
655 T.data(),
656 &LDT,
657 &NV,
658 WV.data(),
659 &LDWV,
660 work.data(),
661 &LWORK);
662 } else {
663 ASSERT(0);
664 }
665 ns = NS;
666 nd = ND;
667 }
668
669 #endif // CHECK_CXXLAPACK
670
671 //== public interface ==========================================================
672 template <typename IndexType, typename MT>
673 IndexType
674 laqr3_wsq(IndexType kTop,
675 IndexType kBot,
676 IndexType nw,
677 const GeMatrix<MT> &T)
678 {
679 using std::max;
680 //
681 // Test the input parameters
682 //
683 # ifndef NDEBUG
684 ASSERT(T.firstRow()==1);
685 ASSERT(T.firstCol()==1);
686 ASSERT(T.numRows()==T.numCols());
687 # endif
688
689 //
690 // Call implementation
691 //
692 // TODO: call generic implementation
693 IndexType info = laqr3_generic_wsq(kTop, kBot, nw, T);
694
695 # ifdef CHECK_CXXLAPACK
696 //
697 // Compare results
698 //
699 IndexType _info = laqr3_native_wsq(kTop, kBot, nw, T);
700
701 if (info!=_info) {
702 std::cerr << "CXXLAPACK: info = " << info << std::endl;
703 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
704 ASSERT(0);
705 }
706 # endif
707 return info;
708
709 }
710
711 template <typename IndexType, typename MH, typename MZ, typename VSR,
712 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
713 void
714 laqr3(bool wantT,
715 bool wantZ,
716 IndexType kTop,
717 IndexType kBot,
718 IndexType nw,
719 GeMatrix<MH> &H,
720 IndexType iLoZ,
721 IndexType iHiZ,
722 GeMatrix<MZ> &Z,
723 IndexType &ns,
724 IndexType &nd,
725 DenseVector<VSR> &sr,
726 DenseVector<VSI> &si,
727 GeMatrix<MV> &V,
728 GeMatrix<MT> &T,
729 GeMatrix<MWV> &WV,
730 DenseVector<VWORK> &work)
731 {
732 LAPACK_DEBUG_OUT("laqr3");
733
734 using std::max;
735 //
736 // Test the input parameters
737 //
738 # ifndef NDEBUG
739 ASSERT(H.firstRow()==1);
740 ASSERT(H.firstCol()==1);
741 ASSERT(H.numRows()==H.numCols());
742
743 const IndexType n = H.numRows();
744
745 if (wantZ) {
746 ASSERT(1<=iLoZ);
747 ASSERT(iLoZ<=iHiZ);
748 ASSERT(iHiZ<=n);
749
750 ASSERT(Z.firstRow()==1);
751 ASSERT(Z.firstCol()==1);
752 ASSERT(Z.numRows()==n);
753 ASSERT(Z.numCols()==n);
754 }
755
756 ASSERT(sr.length()==kBot);
757 ASSERT(si.length()==kBot);
758
759 ASSERT(V.firstRow()==1);
760 ASSERT(V.firstCol()==1);
761 ASSERT(V.numRows()==nw);
762 ASSERT(V.numCols()==nw);
763
764 const IndexType nh = T.numCols();
765 ASSERT(nh>=nw);
766
767 ASSERT(T.firstRow()==1);
768 ASSERT(T.firstCol()==1);
769
770 const IndexType nv = WV.numRows();
771 ASSERT(nv>=nw);
772
773 ASSERT((work.length()==0) || (work.length()>=n));
774 # endif
775
776 //
777 // Make copies of output arguments
778 //
779 # ifdef CHECK_CXXLAPACK
780 typename GeMatrix<MH>::NoView H_org = H;
781 typename GeMatrix<MZ>::NoView Z_org = Z;
782 IndexType ns_org = ns;
783 IndexType nd_org = nd;
784 typename DenseVector<VSR>::NoView sr_org = sr;
785 typename DenseVector<VSI>::NoView si_org = si;
786 typename GeMatrix<MV>::NoView V_org = V;
787 typename GeMatrix<MT>::NoView T_org = T;
788 typename GeMatrix<MWV>::NoView WV_org = WV;
789 typename DenseVector<VWORK>::NoView work_org = work;
790 # endif
791
792 //
793 // Call implementation
794 //
795 laqr3_generic(wantT, wantZ, kTop, kBot, nw, H,
796 iLoZ, iHiZ, Z, ns, nd, sr, si,
797 V, T, WV, work);
798
799 # ifdef CHECK_CXXLAPACK
800 //
801 // Make copies of results computed by the generic implementation
802 //
803 typename GeMatrix<MH>::NoView H_generic = H;
804 typename GeMatrix<MZ>::NoView Z_generic = Z;
805 IndexType ns_generic = ns;
806 IndexType nd_generic = nd;
807 typename DenseVector<VSR>::NoView sr_generic = sr;
808 typename DenseVector<VSI>::NoView si_generic = si;
809 typename GeMatrix<MV>::NoView V_generic = V;
810 typename GeMatrix<MT>::NoView T_generic = T;
811 typename GeMatrix<MWV>::NoView WV_generic = WV;
812 typename DenseVector<VWORK>::NoView work_generic = work;
813
814 //
815 // restore output arguments
816 //
817 H = H_org;
818 Z = Z_org;
819 ns = ns_org;
820 nd = nd_org;
821 sr = sr_org;
822 si = si_org;
823 V = V_org;
824 T = T_org;
825 WV = WV_org;
826 work = work_org;
827
828 //
829 // Compare generic results with results from the native implementation
830 //
831 laqr3_native(wantT, wantZ, kTop, kBot, nw, H,
832 iLoZ, iHiZ, Z, ns, nd, sr, si,
833 V, T, WV, work);
834
835 bool failed = false;
836 if (! isIdentical(H_generic, H, "H_generic", "H")) {
837 std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
838 std::cerr << "F77LAPACK: H = " << H << std::endl;
839 std::cerr << "Original: H_org = " << H_org << std::endl;
840 failed = true;
841 }
842
843 if (! isIdentical(Z_generic, Z, "Z_generic", "Z")) {
844 std::cerr << "CXXLAPACK: Z = " << Z_generic << std::endl;
845 std::cerr << "F77LAPACK: Z = " << Z << std::endl;
846 failed = true;
847 }
848
849 if (! isIdentical(ns_generic, ns, "ns_generic", "ns")) {
850 std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
851 std::cerr << "F77LAPACK: ns = " << ns << std::endl;
852 failed = true;
853 }
854
855 if (! isIdentical(nd_generic, nd, "nd_generic", "nd")) {
856 std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
857 std::cerr << "F77LAPACK: nd = " << nd << std::endl;
858 failed = true;
859 }
860
861 if (! isIdentical(sr_generic, sr, "sr_generic", "sr")) {
862 std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
863 std::cerr << "F77LAPACK: sr = " << sr << std::endl;
864 failed = true;
865 }
866
867 if (! isIdentical(si_generic, si, "si_generic", "si")) {
868 std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
869 std::cerr << "F77LAPACK: si = " << si << std::endl;
870 failed = true;
871 }
872
873 if (! isIdentical(V_generic, V, " V_generic", "V")) {
874 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
875 std::cerr << "F77LAPACK: V = " << V << std::endl;
876 failed = true;
877 }
878
879 if (! isIdentical(T_generic, T, "T_generic", "T")) {
880 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
881 std::cerr << "F77LAPACK: T = " << T << std::endl;
882 failed = true;
883 }
884
885 if (! isIdentical(WV_generic, WV, "WV_generic", "WV")) {
886 std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
887 std::cerr << "F77LAPACK: WV = " << WV << std::endl;
888 failed = true;
889 }
890
891 if (! isIdentical(work_generic, work, "work_generic", "work")) {
892 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
893 std::cerr << "F77LAPACK: work = " << work << std::endl;
894 failed = true;
895 }
896
897 if (failed) {
898 std::cerr << "error in: laqr3.tcc" << std::endl;
899 ASSERT(0);
900 } else {
901 // std::cerr << "passed: laqr3.tcc" << std::endl;
902 }
903 # endif
904 }
905
906 //-- forwarding ----------------------------------------------------------------
907 template <typename IndexType, typename MT>
908 IndexType
909 laqr3_wsq(IndexType kTop,
910 IndexType kBot,
911 IndexType nw,
912 const MT &&T)
913 {
914 CHECKPOINT_ENTER;
915 const IndexType info = laqr3_wsq(kTop, kBot, nw, T);
916 CHECKPOINT_LEAVE;
917
918 return info;
919 }
920
921 template <typename IndexType, typename MH, typename MZ, typename VSR,
922 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
923 void
924 laqr3(bool wantT,
925 bool wantZ,
926 IndexType kTop,
927 IndexType kBot,
928 IndexType nw,
929 MH &&H,
930 IndexType iLoZ,
931 IndexType iHiZ,
932 MZ &&Z,
933 IndexType &ns,
934 IndexType &nd,
935 VSR &&sr,
936 VSI &&si,
937 MV &&V,
938 MT &&T,
939 MWV &&WV,
940 VWORK &&work)
941 {
942 CHECKPOINT_ENTER;
943 laqr3(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
944 V, T, WV, work);
945 CHECKPOINT_LEAVE;
946 }
947
948 } } // namespace lapack, flens
949
950 #endif // FLENS_LAPACK_EIG_LAQR3_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQR3( WANTT, WANTZ, N, KTOP, KBOT, NW, H, LDH, ILOZ,
36 $ IHIZ, Z, LDZ, NS, ND, SR, SI, V, LDV, NH, T,
37 $ LDT, NV, WV, LDWV, WORK, LWORK )
38 *
39 * -- LAPACK auxiliary routine (version 3.2.2) --
40 * Univ. of Tennessee, Univ. of California Berkeley,
41 * Univ. of Colorado Denver and NAG Ltd..
42 * -- June 2010 --
43 *
44 */
45
46 #ifndef FLENS_LAPACK_EIG_LAQR3_TCC
47 #define FLENS_LAPACK_EIG_LAQR3_TCC 1
48
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename IndexType, typename MT>
57 IndexType
58 laqr3_generic_wsq(IndexType kTop,
59 IndexType kBot,
60 IndexType nw,
61 const GeMatrix<MT> &T)
62 {
63 using std::max;
64 using std::min;
65
66 typedef typename GeMatrix<MT>::ElementType ElementType;
67
68 const Underscore<IndexType> _;
69
70 //
71 // ==== Estimate optimal workspace. ====
72 //
73 IndexType jw = min(nw, kBot-kTop+1);
74 auto _T = T(_(1,jw),_(1,jw));
75
76 IndexType lWorkOpt;
77 if (jw<=2) {
78 lWorkOpt = 1;
79 } else {
80 //
81 // ==== Workspace query call to DGEHRD ====
82 //
83 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, _T);
84 //
85 // ==== Workspace query call to DORMHR ====
86 //
87 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, _T);
88 //
89 // ==== Workspace query call to DLAQR4 ====
90 //
91 IndexType lWork3 = laqr4_wsq(true, true, IndexType(1), jw, _T);
92 //
93 // ==== Optimal workspace ====
94 //
95 lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
96 }
97 return lWorkOpt;
98 }
99
100 template <typename IndexType, typename MH, typename MZ, typename VSR,
101 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
102 void
103 laqr3_generic(bool wantT,
104 bool wantZ,
105 IndexType kTop,
106 IndexType kBot,
107 IndexType nw,
108 GeMatrix<MH> &H,
109 IndexType iLoZ,
110 IndexType iHiZ,
111 GeMatrix<MZ> &Z,
112 IndexType &ns,
113 IndexType &nd,
114 DenseVector<VSR> &sr,
115 DenseVector<VSI> &si,
116 GeMatrix<MV> &V,
117 GeMatrix<MT> &T,
118 GeMatrix<MWV> &WV,
119 DenseVector<VWORK> &work)
120 {
121 using std::abs;
122 using std::max;
123 using std::min;
124
125 typedef typename GeMatrix<MH>::ElementType ElementType;
126
127 const Underscore<IndexType> _;
128
129 const ElementType Zero(0), One(1);
130 const IndexType n = H.numRows();
131 const IndexType nh = T.numCols();
132 const IndexType nv = WV.numRows();
133 //
134 // ==== Estimate optimal workspace. ====
135 //
136 IndexType jw = min(nw, kBot-kTop+1);
137 auto T_jw = T(_(1,jw),_(1,jw));
138 auto V_jw = V(_(1,jw),_(1,jw));
139
140 IndexType lWorkOpt;
141 if (jw<=2) {
142 lWorkOpt = 1;
143 } else {
144 //
145 // ==== Workspace query call to DGEHRD ====
146 //
147 IndexType lWork1 = hrd_wsq(IndexType(1), jw-1, T_jw);
148 //
149 // ==== Workspace query call to DORMHR ====
150 //
151 IndexType lWork2 = ormhr_wsq(Right, NoTrans, IndexType(1), jw-1, V_jw);
152 //
153 // ==== Workspace query call to DLAQR4 ====
154 //
155 IndexType lWork3 = laqr4_wsq(true, true, IndexType(1), jw, T_jw);
156 //
157 // ==== Optimal workspace ====
158 //
159 lWorkOpt = max(jw+max(lWork1,lWork2), lWork3);
160 }
161 //
162 // ==== Apply worksize query ====
163 //
164 if (work.length()==0) {
165 work.resize(lWorkOpt);
166 }
167 const IndexType lWork = work.length();
168 //
169 // ==== Nothing to do ...
170 // ... for an empty active block ... ====
171 ns = 0;
172 nd = 0;
173 work(1) = One;
174 if (kTop>kBot) {
175 return;
176 }
177 // ... nor for an empty deflation window. ====
178 if (nw<1) {
179 return;
180 }
181 //
182 // ==== Machine constants ====
183 //
184 ElementType safeMin = lamch<ElementType>(SafeMin);
185 ElementType safeMax = One / safeMin;
186 labad(safeMin, safeMax);
187 const ElementType ulp = lamch<ElementType>(Precision);
188 const ElementType smallNum = safeMin*(ElementType(n)/ulp);
189 //
190 // ==== Setup deflation window ====
191 //
192 IndexType kwTop = kBot - jw + 1;
193 auto sr_kw = sr(_(kwTop,kBot));
194 auto si_kw = si(_(kwTop,kBot));
195 auto H_kw = H(_(kwTop,kBot),_(kwTop,kBot));
196 ElementType s;
197
198 if( kwTop==kTop) {
199 s = Zero;
200 } else {
201 s = H(kwTop, kwTop-1);
202 }
203
204 if (kBot==kwTop) {
205 //
206 // ==== 1-by-1 deflation window: not much to do ====
207 //
208 sr(kwTop) = H(kwTop,kwTop);
209 si(kwTop) = Zero;
210 ns = 1;
211 nd = 0;
212 if (abs(s)<=max(smallNum,ulp*abs(H(kwTop,kwTop)))) {
213 ns = 0;
214 nd = 1;
215 if (kwTop>kTop) {
216 H(kwTop, kwTop-1) = Zero;
217 }
218 }
219 work(1) = One;
220 return;
221 }
222 //
223 // ==== Convert to spike-triangular form. (In case of a
224 // . rare QR failure, this routine continues to do
225 // . aggressive early deflation using that part of
226 // . the deflation window that converged using INFQR
227 // . here and there to keep track.) ====
228 //
229 T_jw.upper() = H_kw.upper();
230 T_jw.diag(-1) = H_kw.diag(-1);
231
232 V_jw = Zero;
233 V_jw.diag(0) = One;
234
235 IndexType nMin = ilaenv<ElementType>(12, "LAQR3", "SV", jw, 1, jw, lWork);
236 IndexType infoQR;
237 if (jw>nMin) {
238 infoQR = laqr4(true, true,
239 IndexType(1), jw, T_jw,
240 sr_kw, si_kw,
241 IndexType(1), jw, V_jw,
242 work);
243 } else {
244 infoQR = lahqr(true, true,
245 IndexType(1), jw, T_jw,
246 sr_kw, si_kw,
247 IndexType(1), jw, V_jw);
248 }
249 //
250 // ==== DTREXC needs a clean margin near the diagonal ====
251 //
252 for (IndexType j=1; j<=jw-3; ++j) {
253 T(j+2, j) = Zero;
254 T(j+3, j) = Zero;
255 }
256 if (jw>2) {
257 T(jw,jw-2) = Zero;
258 }
259 //
260 // ==== Deflation detection loop ====
261 //
262 ns = jw;
263 IndexType iFirst;
264 IndexType iLast = infoQR + 1;
265 while (iLast<=ns) {
266 bool bulge;
267 if (ns==1) {
268 bulge = false;
269 } else {
270 bulge = T(ns,ns-1)!=Zero;
271 }
272 //
273 // ==== Small spike tip test for deflation ====
274 //
275 if (!bulge) {
276 //
277 // ==== Real eigenvalue ====
278 //
279 ElementType foo = abs(T(ns,ns));
280 if (foo==Zero) {
281 foo = abs(s);
282 }
283 if (abs(s*V(1,ns))<=max(smallNum,ulp*foo)) {
284 //
285 // ==== Deflatable ====
286 //
287 ns = ns - 1;
288 } else {
289 //
290 // ==== Undeflatable. Move it up out of the way.
291 // . (DTREXC can not fail in this case.) ====
292 //
293 iFirst = ns;
294 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
295 ++iLast;
296 }
297 } else {
298 //
299 // ==== Complex conjugate pair ====
300 //
301 ElementType foo = abs(T(ns,ns))
302 + sqrt(abs(T(ns,ns-1)))*sqrt(abs(T(ns-1,ns)));
303 if (foo==Zero) {
304 foo = abs(s);
305 }
306 if (max(abs(s*V(1,ns)), abs(s*V(1,ns-1)))<=max(smallNum,ulp*foo)) {
307 //
308 // ==== Deflatable ====
309 //
310 ns -= 2;
311 } else {
312 //
313 // ==== Undeflatable. Move them up out of the way.
314 // . Fortunately, DTREXC does the right thing with
315 // . ILST in case of a rare exchange failure. ====
316 //
317 iFirst = ns;
318 trexc(true, T_jw, V_jw, iFirst, iLast, work(_(1,jw)));
319 iLast += 2;
320 }
321 }
322 //
323 // ==== End deflation detection loop ====
324 //
325 }
326 //
327 // ==== Return to Hessenberg form ====
328 //
329 if (ns==0) {
330 s = Zero;
331 }
332
333 if (ns<jw) {
334 //
335 // ==== sorting diagonal blocks of T improves accuracy for
336 // . graded matrices. Bubble sort deals well with
337 // . exchange failures. ====
338 //
339 bool sorted = false;
340 IndexType i = ns + 1;
341 while (!sorted) {
342 sorted = true;
343
344 IndexType kEnd = i - 1;
345 i = infoQR + 1;
346
347 IndexType k;
348 if (i==ns) {
349 k = i + 1;
350 } else if (T(i+1,i)==Zero) {
351 k = i + 1;
352 } else {
353 k = i + 2;
354 }
355 while (k<=kEnd) {
356 ElementType evi, evk;
357
358 if (k==i+1) {
359 evi = abs(T(i,i));
360 } else {
361 evi = abs(T(i,i)) + sqrt(abs(T(i+1,i)))*sqrt(abs(T(i,i+1)));
362 }
363
364 if (k==kEnd) {
365 evk = abs(T(k,k));
366 } else if(T(k+1,k)==Zero) {
367 evk = abs(T(k,k));
368 } else {
369 evk = abs(T(k,k)) + sqrt(abs(T(k+1,k)))*sqrt(abs(T(k,k+1)));
370 }
371
372 if (evi>=evk) {
373 i = k;
374 } else {
375 sorted = false;
376 iFirst = i;
377 iLast = k;
378 IndexType info = trexc(true, T_jw, V_jw, iFirst, iLast,
379 work(_(1,jw)));
380 if (info==0) {
381 i = iLast;
382 } else {
383 i = k;
384 }
385 }
386 if (i==kEnd) {
387 k = i + 1;
388 } else if (T(i+1,i)==Zero) {
389 k = i + 1;
390 } else {
391 k = i + 2;
392 }
393 }
394 }
395 }
396 //
397 // ==== Restore shift/eigenvalue array from T ====
398 //
399 IndexType i = jw;
400 while (i>=infoQR+1) {
401 if (i==infoQR+1) {
402 sr(kwTop+i-1) = T(i,i);
403 si(kwTop+i-1) = Zero;
404 i = i - 1;
405 } else if (T(i,i-1)==Zero) {
406 sr(kwTop+i-1 ) = T(i,i);
407 si(kwTop+i-1 ) = Zero;
408 i = i - 1;
409 } else {
410 ElementType aa = T(i-1,i-1);
411 ElementType cc = T(i, i-1);
412 ElementType bb = T(i-1,i );
413 ElementType dd = T(i, i );
414 ElementType cs, sn;
415 lanv2(aa, bb, cc, dd,
416 sr(kwTop+i-2), si(kwTop+i-2),
417 sr(kwTop+i-1), si(kwTop+i-1),
418 cs, sn);
419 i -= 2;
420 }
421 }
422
423 if (ns<jw || s==Zero) {
424 if (ns>1 && s!=Zero) {
425 //
426 // ==== Reflect spike back into lower triangle ====
427 //
428 work(_(1,ns)) = V(1,_(1,ns));
429 ElementType beta = work(1);
430 ElementType tau;
431
432 larfg(ns, beta, work(_(2,ns)), tau);
433 work(1) = One;
434
435 T(_(3,jw),_(1,jw-2)).lower() = Zero;
436
437 const auto _v = work(_(1,ns));
438
439 larf(Left, _v, tau, T(_(1,ns),_(1,jw)), work(_(jw+1,jw+jw)));
440 larf(Right, _v, tau, T(_(1,ns),_(1,ns)), work(_(jw+1,jw+ns)));
441 larf(Right, _v, tau, V(_(1,jw),_(1,ns)), work(_(jw+1,jw+jw)));
442
443 hrd(IndexType(1), ns, T_jw, work(_(1,jw-1)), work(_(jw+1,lWork)));
444 }
445 //
446 // ==== Copy updated reduced window into place ====
447 //
448 if (kwTop>1) {
449 H(kwTop,kwTop-1) = s*V(1,1);
450 }
451 H_kw.upper() = T_jw.upper();
452 H_kw.diag(-1) = T_jw.diag(-1);
453 //
454 // ==== Accumulate orthogonal matrix in order update
455 // . H and Z, if requested. ====
456 //
457 if (ns>1 && s!=Zero) {
458 ormhr(Right, NoTrans,
459 IndexType(1), ns,
460 T(_(1,ns),_(1,ns)),
461 work(_(1,ns-1)),
462 V(_(1,jw),_(1,ns)),
463 work(_(jw+1,lWork)));
464 }
465 //
466 // ==== Update vertical slab in H ====
467 //
468 const IndexType lTop = (wantT) ? 1 : kTop;
469
470 for (IndexType kRow=lTop; kRow<=kwTop-1; kRow+=nv) {
471 const IndexType kLn = min(nv,kwTop-kRow);
472 blas::mm(NoTrans, NoTrans, One,
473 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
474 V(_(1,jw),_(1,jw)),
475 Zero,
476 WV(_(1,kLn),_(1,jw)));
477 H(_(kRow,kRow+kLn-1),_(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
478 }
479 //
480 // ==== Update horizontal slab in H ====
481 //
482 if (wantT) {
483 for (IndexType kCol=kBot+1; kCol<=n; kCol+=nh) {
484 const IndexType kLn = min(nh,n-kCol+1);
485 blas::mm(ConjTrans, NoTrans, One,
486 V(_(1,jw),_(1,jw)),
487 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)),
488 Zero,
489 T(_(1,jw),_(1,kLn)));
490 H(_(kwTop,kBot),_(kCol,kCol+kLn-1)) = T(_(1,jw),_(1,kLn));
491 }
492 }
493 //
494 // ==== Update vertical slab in Z ====
495 //
496 if (wantZ) {
497 for (IndexType kRow=iLoZ; kRow<=iHiZ; kRow+=nv) {
498 const IndexType kLn = min(nv,iHiZ-kRow+1);
499 blas::mm(NoTrans, NoTrans, One,
500 Z(_(kRow,kRow+kLn-1),_(kwTop,kBot)),
501 V(_(1,jw),_(1,jw)),
502 Zero,
503 WV(_(1,kLn),_(1,jw)));
504 Z(_(kRow,kRow+kLn-1), _(kwTop,kBot)) = WV(_(1,kLn),_(1,jw));
505 }
506 }
507 }
508 //
509 // ==== Return the number of deflations ... ====
510 //
511 nd = jw - ns;
512 //
513 // ==== ... and the number of shifts. (Subtracting
514 // . INFQR from the spike length takes care
515 // . of the case of a rare QR failure while
516 // . calculating eigenvalues of the deflation
517 // . window.) ====
518 //
519 ns = ns - infoQR;
520 //
521 // ==== Return optimal workspace. ====
522 //
523 work(1) = lWorkOpt;
524 }
525
526 //== interface for native lapack ===============================================
527
528 #ifdef CHECK_CXXLAPACK
529
530 template <typename IndexType, typename MT>
531 IndexType
532 laqr3_native_wsq(IndexType kTop,
533 IndexType kBot,
534 IndexType nw,
535 const GeMatrix<MT> &T)
536 {
537 typedef typename GeMatrix<MT>::ElementType ElementType;
538
539 const LOGICAL WANTT = false;
540 const LOGICAL WANTZ = false;
541 const INTEGER N = 1;
542 const INTEGER KTOP = kTop;
543 const INTEGER KBOT = kBot;
544 const INTEGER NW = nw;
545 const INTEGER LDH = 1;
546 const INTEGER ILOZ = 0;
547 const INTEGER IHIZ = 0;
548 const INTEGER LDZ = 1;
549 INTEGER NS;
550 INTEGER ND;
551 const INTEGER LDV = nw;
552 const INTEGER NH = T.numCols();
553 const INTEGER LDT = T.leadingDimension();
554 const INTEGER NV = nw;
555 const INTEGER LDWV = nw;
556 ElementType WORK;
557 const INTEGER LWORK = -1;
558 ElementType DUMMY;
559
560 if (IsSame<ElementType,DOUBLE>::value) {
561 LAPACK_IMPL(dlaqr3)(&WANTT,
562 &WANTZ,
563 &N,
564 &KTOP,
565 &KBOT,
566 &NW,
567 &DUMMY,
568 &LDH,
569 &ILOZ,
570 &IHIZ,
571 &DUMMY,
572 &LDZ,
573 &NS,
574 &ND,
575 &DUMMY,
576 &DUMMY,
577 &DUMMY,
578 &LDV,
579 &NH,
580 &DUMMY,
581 &LDT,
582 &NV,
583 &DUMMY,
584 &LDWV,
585 &WORK,
586 &LWORK);
587 } else {
588 ASSERT(0);
589 }
590 return WORK;
591 }
592
593 template <typename IndexType, typename MH, typename MZ, typename VSR,
594 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
595 void
596 laqr3_native(bool wantT,
597 bool wantZ,
598 IndexType kTop,
599 IndexType kBot,
600 IndexType nw,
601 GeMatrix<MH> &H,
602 IndexType iLoZ,
603 IndexType iHiZ,
604 GeMatrix<MZ> &Z,
605 IndexType &ns,
606 IndexType &nd,
607 DenseVector<VSR> &sr,
608 DenseVector<VSI> &si,
609 GeMatrix<MV> &V,
610 GeMatrix<MT> &T,
611 GeMatrix<MWV> &WV,
612 DenseVector<VWORK> &work)
613 {
614 typedef typename GeMatrix<MH>::ElementType ElementType;
615
616 const LOGICAL WANTT = wantT;
617 const LOGICAL WANTZ = wantZ;
618 const INTEGER N = H.numRows();
619 const INTEGER KTOP = kTop;
620 const INTEGER KBOT = kBot;
621 const INTEGER NW = nw;
622 const INTEGER LDH = H.leadingDimension();
623 const INTEGER ILOZ = iLoZ;
624 const INTEGER IHIZ = iHiZ;
625 const INTEGER LDZ = Z.leadingDimension();
626 INTEGER NS = ns;
627 INTEGER ND = nd;
628 const INTEGER LDV = V.leadingDimension();
629 const INTEGER NH = T.numCols();
630 const INTEGER LDT = T.leadingDimension();
631 const INTEGER NV = WV.numRows();
632 const INTEGER LDWV = WV.leadingDimension();
633 const INTEGER LWORK = work.length();
634
635 if (IsSame<ElementType,DOUBLE>::value) {
636 LAPACK_IMPL(dlaqr3)(&WANTT,
637 &WANTZ,
638 &N,
639 &KTOP,
640 &KBOT,
641 &NW,
642 H.data(),
643 &LDH,
644 &ILOZ,
645 &IHIZ,
646 Z.data(),
647 &LDZ,
648 &NS,
649 &ND,
650 sr.data(),
651 si.data(),
652 V.data(),
653 &LDV,
654 &NH,
655 T.data(),
656 &LDT,
657 &NV,
658 WV.data(),
659 &LDWV,
660 work.data(),
661 &LWORK);
662 } else {
663 ASSERT(0);
664 }
665 ns = NS;
666 nd = ND;
667 }
668
669 #endif // CHECK_CXXLAPACK
670
671 //== public interface ==========================================================
672 template <typename IndexType, typename MT>
673 IndexType
674 laqr3_wsq(IndexType kTop,
675 IndexType kBot,
676 IndexType nw,
677 const GeMatrix<MT> &T)
678 {
679 using std::max;
680 //
681 // Test the input parameters
682 //
683 # ifndef NDEBUG
684 ASSERT(T.firstRow()==1);
685 ASSERT(T.firstCol()==1);
686 ASSERT(T.numRows()==T.numCols());
687 # endif
688
689 //
690 // Call implementation
691 //
692 // TODO: call generic implementation
693 IndexType info = laqr3_generic_wsq(kTop, kBot, nw, T);
694
695 # ifdef CHECK_CXXLAPACK
696 //
697 // Compare results
698 //
699 IndexType _info = laqr3_native_wsq(kTop, kBot, nw, T);
700
701 if (info!=_info) {
702 std::cerr << "CXXLAPACK: info = " << info << std::endl;
703 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
704 ASSERT(0);
705 }
706 # endif
707 return info;
708
709 }
710
711 template <typename IndexType, typename MH, typename MZ, typename VSR,
712 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
713 void
714 laqr3(bool wantT,
715 bool wantZ,
716 IndexType kTop,
717 IndexType kBot,
718 IndexType nw,
719 GeMatrix<MH> &H,
720 IndexType iLoZ,
721 IndexType iHiZ,
722 GeMatrix<MZ> &Z,
723 IndexType &ns,
724 IndexType &nd,
725 DenseVector<VSR> &sr,
726 DenseVector<VSI> &si,
727 GeMatrix<MV> &V,
728 GeMatrix<MT> &T,
729 GeMatrix<MWV> &WV,
730 DenseVector<VWORK> &work)
731 {
732 LAPACK_DEBUG_OUT("laqr3");
733
734 using std::max;
735 //
736 // Test the input parameters
737 //
738 # ifndef NDEBUG
739 ASSERT(H.firstRow()==1);
740 ASSERT(H.firstCol()==1);
741 ASSERT(H.numRows()==H.numCols());
742
743 const IndexType n = H.numRows();
744
745 if (wantZ) {
746 ASSERT(1<=iLoZ);
747 ASSERT(iLoZ<=iHiZ);
748 ASSERT(iHiZ<=n);
749
750 ASSERT(Z.firstRow()==1);
751 ASSERT(Z.firstCol()==1);
752 ASSERT(Z.numRows()==n);
753 ASSERT(Z.numCols()==n);
754 }
755
756 ASSERT(sr.length()==kBot);
757 ASSERT(si.length()==kBot);
758
759 ASSERT(V.firstRow()==1);
760 ASSERT(V.firstCol()==1);
761 ASSERT(V.numRows()==nw);
762 ASSERT(V.numCols()==nw);
763
764 const IndexType nh = T.numCols();
765 ASSERT(nh>=nw);
766
767 ASSERT(T.firstRow()==1);
768 ASSERT(T.firstCol()==1);
769
770 const IndexType nv = WV.numRows();
771 ASSERT(nv>=nw);
772
773 ASSERT((work.length()==0) || (work.length()>=n));
774 # endif
775
776 //
777 // Make copies of output arguments
778 //
779 # ifdef CHECK_CXXLAPACK
780 typename GeMatrix<MH>::NoView H_org = H;
781 typename GeMatrix<MZ>::NoView Z_org = Z;
782 IndexType ns_org = ns;
783 IndexType nd_org = nd;
784 typename DenseVector<VSR>::NoView sr_org = sr;
785 typename DenseVector<VSI>::NoView si_org = si;
786 typename GeMatrix<MV>::NoView V_org = V;
787 typename GeMatrix<MT>::NoView T_org = T;
788 typename GeMatrix<MWV>::NoView WV_org = WV;
789 typename DenseVector<VWORK>::NoView work_org = work;
790 # endif
791
792 //
793 // Call implementation
794 //
795 laqr3_generic(wantT, wantZ, kTop, kBot, nw, H,
796 iLoZ, iHiZ, Z, ns, nd, sr, si,
797 V, T, WV, work);
798
799 # ifdef CHECK_CXXLAPACK
800 //
801 // Make copies of results computed by the generic implementation
802 //
803 typename GeMatrix<MH>::NoView H_generic = H;
804 typename GeMatrix<MZ>::NoView Z_generic = Z;
805 IndexType ns_generic = ns;
806 IndexType nd_generic = nd;
807 typename DenseVector<VSR>::NoView sr_generic = sr;
808 typename DenseVector<VSI>::NoView si_generic = si;
809 typename GeMatrix<MV>::NoView V_generic = V;
810 typename GeMatrix<MT>::NoView T_generic = T;
811 typename GeMatrix<MWV>::NoView WV_generic = WV;
812 typename DenseVector<VWORK>::NoView work_generic = work;
813
814 //
815 // restore output arguments
816 //
817 H = H_org;
818 Z = Z_org;
819 ns = ns_org;
820 nd = nd_org;
821 sr = sr_org;
822 si = si_org;
823 V = V_org;
824 T = T_org;
825 WV = WV_org;
826 work = work_org;
827
828 //
829 // Compare generic results with results from the native implementation
830 //
831 laqr3_native(wantT, wantZ, kTop, kBot, nw, H,
832 iLoZ, iHiZ, Z, ns, nd, sr, si,
833 V, T, WV, work);
834
835 bool failed = false;
836 if (! isIdentical(H_generic, H, "H_generic", "H")) {
837 std::cerr << "CXXLAPACK: H_generic = " << H_generic << std::endl;
838 std::cerr << "F77LAPACK: H = " << H << std::endl;
839 std::cerr << "Original: H_org = " << H_org << std::endl;
840 failed = true;
841 }
842
843 if (! isIdentical(Z_generic, Z, "Z_generic", "Z")) {
844 std::cerr << "CXXLAPACK: Z = " << Z_generic << std::endl;
845 std::cerr << "F77LAPACK: Z = " << Z << std::endl;
846 failed = true;
847 }
848
849 if (! isIdentical(ns_generic, ns, "ns_generic", "ns")) {
850 std::cerr << "CXXLAPACK: ns_generic = " << ns_generic << std::endl;
851 std::cerr << "F77LAPACK: ns = " << ns << std::endl;
852 failed = true;
853 }
854
855 if (! isIdentical(nd_generic, nd, "nd_generic", "nd")) {
856 std::cerr << "CXXLAPACK: nd_generic = " << nd_generic << std::endl;
857 std::cerr << "F77LAPACK: nd = " << nd << std::endl;
858 failed = true;
859 }
860
861 if (! isIdentical(sr_generic, sr, "sr_generic", "sr")) {
862 std::cerr << "CXXLAPACK: sr_generic = " << sr_generic << std::endl;
863 std::cerr << "F77LAPACK: sr = " << sr << std::endl;
864 failed = true;
865 }
866
867 if (! isIdentical(si_generic, si, "si_generic", "si")) {
868 std::cerr << "CXXLAPACK: si_generic = " << si_generic << std::endl;
869 std::cerr << "F77LAPACK: si = " << si << std::endl;
870 failed = true;
871 }
872
873 if (! isIdentical(V_generic, V, " V_generic", "V")) {
874 std::cerr << "CXXLAPACK: V_generic = " << V_generic << std::endl;
875 std::cerr << "F77LAPACK: V = " << V << std::endl;
876 failed = true;
877 }
878
879 if (! isIdentical(T_generic, T, "T_generic", "T")) {
880 std::cerr << "CXXLAPACK: T_generic = " << T_generic << std::endl;
881 std::cerr << "F77LAPACK: T = " << T << std::endl;
882 failed = true;
883 }
884
885 if (! isIdentical(WV_generic, WV, "WV_generic", "WV")) {
886 std::cerr << "CXXLAPACK: WV_generic = " << WV_generic << std::endl;
887 std::cerr << "F77LAPACK: WV = " << WV << std::endl;
888 failed = true;
889 }
890
891 if (! isIdentical(work_generic, work, "work_generic", "work")) {
892 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
893 std::cerr << "F77LAPACK: work = " << work << std::endl;
894 failed = true;
895 }
896
897 if (failed) {
898 std::cerr << "error in: laqr3.tcc" << std::endl;
899 ASSERT(0);
900 } else {
901 // std::cerr << "passed: laqr3.tcc" << std::endl;
902 }
903 # endif
904 }
905
906 //-- forwarding ----------------------------------------------------------------
907 template <typename IndexType, typename MT>
908 IndexType
909 laqr3_wsq(IndexType kTop,
910 IndexType kBot,
911 IndexType nw,
912 const MT &&T)
913 {
914 CHECKPOINT_ENTER;
915 const IndexType info = laqr3_wsq(kTop, kBot, nw, T);
916 CHECKPOINT_LEAVE;
917
918 return info;
919 }
920
921 template <typename IndexType, typename MH, typename MZ, typename VSR,
922 typename VSI, typename MV, typename MT, typename MWV, typename VWORK>
923 void
924 laqr3(bool wantT,
925 bool wantZ,
926 IndexType kTop,
927 IndexType kBot,
928 IndexType nw,
929 MH &&H,
930 IndexType iLoZ,
931 IndexType iHiZ,
932 MZ &&Z,
933 IndexType &ns,
934 IndexType &nd,
935 VSR &&sr,
936 VSI &&si,
937 MV &&V,
938 MT &&T,
939 MWV &&WV,
940 VWORK &&work)
941 {
942 CHECKPOINT_ENTER;
943 laqr3(wantT, wantZ, kTop, kBot, nw, H, iLoZ, iHiZ, Z, ns, nd, sr, si,
944 V, T, WV, work);
945 CHECKPOINT_LEAVE;
946 }
947
948 } } // namespace lapack, flens
949
950 #endif // FLENS_LAPACK_EIG_LAQR3_TCC