1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
36 $ LDVR, MM, M, WORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_EIG_TREVC_TCC
45 #define FLENS_LAPACK_EIG_TREVC_TCC 1
46
47 #include <flens/aux/aux.h>
48 #include <flens/blas/blas.h>
49 #include <flens/lapack/lapack.h>
50
51 namespace flens { namespace lapack {
52
53 //== generic lapack implementation =============================================
54
55 template <typename VSELECT, typename MT, typename MVL, typename MVR,
56 typename IndexType, typename VWORK>
57 void
58 trevc_generic(bool computeVL,
59 bool computeVR,
60 TREVC::Job howMany,
61 DenseVector<VSELECT> &select,
62 const GeMatrix<MT> &T,
63 GeMatrix<MVL> &VL,
64 GeMatrix<MVR> &VR,
65 IndexType mm,
66 IndexType &m,
67 DenseVector<VWORK> &work)
68 {
69
70 using std::abs;
71 using flens::max;
72
73 typedef typename GeMatrix<MT>::ElementType ElementType;
74
75 // TODO: define a colmajor GeMatrixView
76 typedef typename GeMatrix<MT>::View GeMatrixView;
77
78 const ElementType Zero(0), One(1);
79
80 const Underscore<IndexType> _;
81 const IndexType n = T.numCols();
82
83 //
84 // .. Local Arrays ..
85 //
86 ElementType _xData[4];
87 GeMatrixView X = typename GeMatrixView::Engine(2, 2, _xData, 2);
88
89 GeMatrixView Work(n, 3, work, n);
90
91 //
92 // Decode and test the input parameters
93 //
94 const bool over = howMany==TREVC::Backtransform;
95 const bool someV = howMany==TREVC::Selected;
96
97 //
98 // Set M to the number of columns required to store the selected
99 // eigenvectors, standardize the array SELECT if necessary, and
100 // test MM.
101 //
102 if (someV) {
103 m = 0;
104 bool pair = false;
105 for (IndexType j=1; j<=n; ++j) {
106 if (pair) {
107 pair = false;
108 select(j) = false;
109 } else {
110 if (j<n) {
111 if (T(j+1,j)==Zero) {
112 if (select(j)) {
113 ++m;
114 }
115 } else {
116 pair = true;
117 if (select(j) || select(j+1)) {
118 select(j) = true;
119 m += 2;
120 }
121 }
122 } else {
123 if (select(n)) {
124 ++m;
125 }
126 }
127 }
128 }
129 } else {
130 m = n;
131 }
132
133 if (mm<m) {
134 ASSERT(0);
135 }
136 //
137 // Quick return if possible.
138 //
139 if (n==0) {
140 return;
141 }
142 //
143 // Set the constants to control overflow.
144 //
145 ElementType underflow = lamch<ElementType>(SafeMin);
146 ElementType overflow = One / underflow;
147 labad(underflow, overflow);
148
149 const ElementType ulp = lamch<ElementType>(Precision);
150 const ElementType smallNum = underflow*(n/ulp);
151 const ElementType bigNum = (One-ulp)/smallNum;
152 //
153 // Compute 1-norm of each column of strictly upper triangular
154 // part of T to control overflow in triangular solver.
155 //
156 work(1) = Zero;
157 for (IndexType j=2; j<=n; ++j) {
158 work(j) = Zero;
159 for (IndexType i=1; i<=j-1; ++i) {
160 work(j) += abs(T(i,j));
161 }
162 }
163 //
164 // Index IP is used to specify the real or complex eigenvalue:
165 // IP = 0, real eigenvalue,
166 // 1, first of conjugate complex pair: (wr,wi)
167 // -1, second of conjugate complex pair: (wr,wi)
168 //
169 const IndexType n2 = 2*n;
170
171 ElementType scale, xNorm, wr, wi;
172
173 if (computeVR) {
174 //
175 // Compute right eigenvectors.
176 //
177
178 IndexType ip = 0;
179 IndexType is = m;
180 for (IndexType ki=n; ki>=1; --ki) {
181
182 if (ip==1) {
183 ip = 0;
184 continue;
185 }
186 if (ki!=1 && T(ki,ki-1)!=Zero) {
187 ip = -1;
188 }
189
190 if (someV) {
191 if (ip==0) {
192 if (!select(ki)) {
193 if (ip==1) {
194 ip = 0;
195 }
196 if (ip==-1) {
197 ip = 1;
198 }
199 continue;
200 }
201 } else {
202 if (!select(ki-1)) {
203 if (ip==1) {
204 ip = 0;
205 }
206 if (ip==-1) {
207 ip = 1;
208 }
209 continue;
210 }
211 }
212 }
213 //
214 // Compute the KI-th eigenvalue (WR,WI).
215 //
216 wr = T(ki,ki);
217 wi = Zero;
218 if (ip!=0) {
219 wi = sqrt(abs(T(ki,ki-1)))*sqrt(abs(T(ki-1,ki)));
220 }
221 const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
222
223 if (ip==0) {
224 //
225 // Real right eigenvector
226 //
227 work(ki+n) = One;
228 //
229 // Form right-hand side
230 // TODO: do this by work(_(...)) = -T(_(...),ki)
231 //
232 for (IndexType k=1; k<=ki-1; ++k) {
233 work(k+n) = -T(k,ki);
234 }
235 //
236 // Solve the upper quasi-triangular system:
237 // (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
238 //
239 IndexType jNext = ki - 1;
240 for (IndexType j=ki-1; j>=1; --j) {
241 if (j>jNext) {
242 continue;
243 }
244 IndexType j1 = j;
245 IndexType j2 = j;
246 jNext = j - 1;
247 if (j>1) {
248 if (T(j,j-1)!=Zero) {
249 j1 = j - 1;
250 jNext = j - 2;
251 }
252 }
253
254 if (j1==j2) {
255 //
256 // 1-by-1 diagonal block
257 //
258 laln2(false,
259 IndexType(1),
260 safeMin,
261 One,
262 T(_(j,j),_(j,j)),
263 One,
264 One,
265 Work(_(j,j), _(2,2)),
266 wr,
267 Zero,
268 X(_(1,1),_(1,1)),
269 scale,
270 xNorm);
271 //
272 // Scale X(1,1) to avoid overflow when updating
273 // the right-hand side.
274 //
275 if (xNorm>One) {
276 if (work(j)>bigNum/xNorm) {
277 X(1,1) /= xNorm;
278 scale /= xNorm;
279 }
280 }
281 //
282 // Scale if necessary
283 //
284 if (scale!=One) {
285 work(_(1+n,ki+n)) *= scale;
286 }
287 work(j+n) = X(1,1);
288 //
289 // Update right-hand side
290 //
291 work(_(1+n,j-1+n)) -= X(1,1)*T(_(1,j-1),j);
292
293 } else {
294 //
295 // 2-by-2 diagonal block
296 //
297 laln2(false,
298 IndexType(1),
299 safeMin,
300 One,
301 T(_(j-1,j),_(j-1,j)),
302 One,
303 One,
304 Work(_(j-1,j), _(2,2)),
305 wr,
306 Zero,
307 X(_(1,2),_(1,1)),
308 scale,
309 xNorm);
310
311 //
312 // Scale X(1,1) and X(2,1) to avoid overflow when
313 // updating the right-hand side.
314 //
315 if (xNorm>One) {
316 const ElementType beta = max(work(j-1), work(j));
317 if (beta>bigNum/xNorm) {
318 X(1,1) /= xNorm;
319 X(2,1) /= xNorm;
320 scale /= xNorm;
321 }
322 }
323 //
324 // Scale if necessary
325 //
326 if (scale!=One) {
327 work(_(1+n,ki+n)) *= scale;
328 }
329 work(j-1+n) = X(1,1);
330 work(j+n) = X(2,1);
331 //
332 // Update right-hand side
333 //
334 work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
335 work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
336 }
337 }
338 //
339 // Copy the vector x or Q*x to VR and normalize.
340 //
341 if (!over) {
342 VR(_(1,ki),is) = work(_(1+n,ki+n));
343
344 const IndexType ii = blas::iamax(VR(_(1,ki),is));
345 const ElementType reMax = One / abs(VR(ii,is));
346 VR(_(1,ki),is) *= reMax;
347
348 VR(_(ki+1,n),is) = Zero;
349 } else {
350 if (ki>1) {
351 blas::mv(NoTrans, One,
352 VR(_,_(1,ki-1)),
353 work(_(1+n,n+ki-1)),
354 work(ki+n),
355 VR(_,ki));
356 }
357 const IndexType ii = blas::iamax(VR(_,ki));
358 const ElementType reMax = One / abs(VR(ii,ki));
359 VR(_,ki) *= reMax;
360 }
361
362 } else {
363 //
364 // Complex right eigenvector.
365 //
366 // Initial solve
367 // [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
368 // [ (T(KI,KI-1) T(KI,KI) ) ]
369 //
370 if (abs(T(ki-1,ki))>=abs(T(ki,ki-1))) {
371 work(ki-1+n) = One;
372 work(ki+n2) = wi / T(ki-1,ki);
373 } else {
374 work(ki-1+n ) = -wi / T(ki,ki-1);
375 work(ki+n2 ) = One;
376 }
377 work(ki+n) = Zero;
378 work(ki-1+n2) = Zero;
379 //
380 // Form right-hand side
381 //
382 for (IndexType k=1; k<=ki-2; ++k) {
383 work(k+n) = -work(ki-1+n) * T(k,ki-1);
384 work(k+n2) = -work(ki+n2) * T(k,ki);
385 }
386 //
387 // Solve upper quasi-triangular system:
388 // (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
389 //
390 IndexType jNext = ki - 2;
391 for (IndexType j=ki-2; j>=1; --j) {
392 if (j>jNext) {
393 continue;
394 }
395 IndexType j1 = j;
396 IndexType j2 = j;
397 jNext = j - 1;
398 if (j>1) {
399 if (T(j,j-1)!=Zero) {
400 j1 = j - 1;
401 jNext = j - 2;
402 }
403 }
404
405 if (j1==j2) {
406 //
407 // 1-by-1 diagonal block
408 //
409 laln2(false,
410 IndexType(2),
411 safeMin,
412 One,
413 T(_(j,j),_(j,j)),
414 One,
415 One,
416 Work(_(j,j), _(2,3)),
417 wr,
418 wi,
419 X(_(1,1),_(1,2)),
420 scale,
421 xNorm);
422 //
423 // Scale X(1,1) and X(1,2) to avoid overflow when
424 // updating the right-hand side.
425 //
426 if (xNorm>One) {
427 if (work(j)>bigNum/xNorm) {
428 X(1,1) /= xNorm;
429 X(1,2) /= xNorm;
430 scale /= xNorm;
431 }
432 }
433 //
434 // Scale if necessary
435 //
436 if (scale!=One) {
437 work(_(1+n,ki+n)) *= scale;
438 work(_(1+n2,ki+n2)) *= scale;
439 }
440 work(j+n) = X(1,1);
441 work(j+n2) = X(1,2);
442 //
443 // Update the right-hand side
444 //
445 work(_(1+n,j-1+n)) -= X(1,1)*T(_(1,j-1),j);
446 work(_(1+n2,j-1+n2)) -= X(1,2)*T(_(1,j-1),j);
447
448 } else {
449 //
450 // 2-by-2 diagonal block
451 //
452 laln2(false,
453 IndexType(2),
454 safeMin,
455 One,
456 T(_(j-1,j),_(j-1,j)),
457 One,
458 One,
459 Work(_(j-1,j), _(2,3)),
460 wr,
461 wi,
462 X(_(1,2),_(1,2)),
463 scale,
464 xNorm);
465
466 //
467 // Scale X to avoid overflow when updating
468 // the right-hand side.
469 //
470 if (xNorm>One) {
471 const ElementType beta = max(work(j-1), work(j));
472 if (beta>bigNum/xNorm) {
473 const ElementType rec = One / xNorm;
474 X(1,1) *= rec;
475 X(1,2) *= rec;
476 X(2,1) *= rec;
477 X(2,2) *= rec;
478 scale *= rec;
479 }
480 }
481 //
482 // Scale if necessary
483 //
484 if (scale!=One) {
485 work(_(1+n,ki+n)) *= scale;
486 work(_(1+n2,ki+n2)) *= scale;
487 }
488 work(j-1+n) = X(1,1);
489 work(j+n) = X(2,1);
490 work(j-1+n2) = X(1,2);
491 work(j+n2) = X(2,2);
492 //
493 // Update the right-hand side
494 //
495 work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
496 work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
497
498 work(_(1+n2,j-2+n2)) -= X(1,2)*T(_(1,j-2),j-1);
499 work(_(1+n2,j-2+n2)) -= X(2,2)*T(_(1,j-2),j);
500 }
501 }
502 //
503 // Copy the vector x or Q*x to VR and normalize.
504 //
505 if (!over) {
506 VR(_(1,ki),is-1) = work(_(1+n,ki+n));
507 VR(_(1,ki),is) = work(_(1+n2,ki+n2));
508
509 ElementType eMax = Zero;
510 for (IndexType k=1; k<=ki; ++k) {
511 eMax = max(eMax, abs(VR(k,is-1))+abs(VR(k,is)));
512 }
513
514 const ElementType reMax = One / eMax;
515 VR(_(1,ki),is-1) *= reMax;
516 VR(_(1,ki),is) *= reMax;
517
518 VR(_(ki+1,n),is-1) = Zero;
519 VR(_(ki+1,n),is) = Zero;
520
521 } else {
522
523 if (ki>2) {
524 blas::mv(NoTrans, One,
525 VR(_,_(1,ki-2)),
526 work(_(1+n,ki-2+n)),
527 work(ki-1+n),
528 VR(_,ki-1));
529 blas::mv(NoTrans, One,
530 VR(_,_(1,ki-2)),
531 work(_(n2+1,n2+ki-2)),
532 work(ki+n2),
533 VR(_,ki));
534 } else {
535 VR(_,ki-1) *= work(ki-1+n);
536 VR(_,ki) *= work(ki+n2);
537 }
538
539 ElementType eMax = Zero;
540 for (IndexType k=1; k<=n; ++k) {
541 eMax = max(eMax, abs(VR(k,ki-1)) + abs(VR(k,ki)));
542 }
543 const ElementType reMax = One / eMax;
544 VR(_,ki-1) *= reMax;
545 VR(_,ki) *= reMax;
546 }
547 }
548
549 --is;
550 if (ip!=0) {
551 --is;
552 }
553 if (ip==1) {
554 ip = 0;
555 }
556 if (ip==-1) {
557 ip = 1;
558 }
559 }
560 }
561
562 if (computeVL) {
563 //
564 // Compute left eigenvectors.
565 //
566 IndexType ip = 0;
567 IndexType is = 1;
568 for (IndexType ki=1; ki<=n; ++ki) {
569
570 if (ip==-1) {
571 ip = 0;
572 continue;
573 }
574 if (ki!=n && T(ki+1,ki)!=Zero) {
575 ip = 1;
576 }
577
578 if (someV) {
579 if (!select(ki)) {
580 if (ip==-1) {
581 ip = 0;
582 }
583 if (ip==1) {
584 ip = -1;
585 }
586 continue;
587 }
588 }
589 //
590 // Compute the KI-th eigenvalue (WR,WI).
591 //
592 ElementType wr = T(ki,ki);
593 ElementType wi = Zero;
594 if (ip!=0) {
595 wi = sqrt(abs(T(ki,ki+1))) * sqrt(abs(T(ki+1,ki)));
596 }
597 const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
598
599 if (ip==0) {
600 //
601 // Real left eigenvector.
602 //
603 work(ki+n) = One;
604 //
605 // Form right-hand side
606 //
607 for (IndexType k=ki+1; k<=n; ++k) {
608 work(k+n) = -T(ki,k);
609 }
610 //
611 // Solve the quasi-triangular system:
612 // (T(KI+1:N,KI+1:N) - WR)**T*X = SCALE*WORK
613 //
614 ElementType vMax = One;
615 ElementType vCrit = bigNum;
616
617 IndexType jNext = ki + 1;
618 for (IndexType j=ki+1; j<=n; ++j) {
619 if (j<jNext) {
620 continue;
621 }
622 IndexType j1 = j;
623 IndexType j2 = j;
624 jNext = j + 1;
625 if (j<n) {
626 if (T(j+1,j)!=Zero) {
627 j2 = j + 1;
628 jNext = j + 2;
629 }
630 }
631
632 if (j1==j2) {
633 //
634 // 1-by-1 diagonal block
635 //
636 // Scale if necessary to avoid overflow when forming
637 // the right-hand side.
638 //
639 if (work(j)>vCrit) {
640 const ElementType rec = One / vMax;
641 work(_(ki+n,n2)) *= rec;
642 vMax = One;
643 vCrit = bigNum;
644 }
645
646 work(j+n) -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
647 //
648 // Solve (T(J,J)-WR)**T*X = WORK
649 //
650 laln2(false,
651 IndexType(1),
652 safeMin,
653 One,
654 T(_(j,j),_(j,j)),
655 One,
656 One,
657 Work(_(j,j), _(2,2)),
658 wr,
659 Zero,
660 X(_(1,1),_(1,1)),
661 scale,
662 xNorm);
663
664 //
665 // Scale if necessary
666 //
667 if (scale!=One) {
668 work(_(ki+n,n2)) *= scale;
669 }
670 work(j+n) = X(1,1);
671 vMax = max(abs(work(j+n)), vMax);
672 vCrit = bigNum / vMax;
673
674 } else {
675 //
676 // 2-by-2 diagonal block
677 //
678 // Scale if necessary to avoid overflow when forming
679 // the right-hand side.
680 //
681 const ElementType beta = max(work(j), work(j+1));
682 if (beta>vCrit) {
683 const ElementType rec = One / vMax;
684 work(_(ki+n,n2)) *= rec;
685 vMax = One;
686 vCrit = bigNum;
687 }
688
689 work(j+n) -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
690 work(j+1+n) -= T(_(ki+1,j-1),j+1)*work(_(ki+1+n,n+j-1));
691 //
692 // Solve
693 // [T(J,J)-WR T(J,J+1) ]**T * X = SCALE*( WORK1 )
694 // [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 )
695 //
696 laln2(true,
697 IndexType(1),
698 safeMin,
699 One,
700 T(_(j,j+1),_(j,j+1)),
701 One,
702 One,
703 Work(_(j,j+1), _(2,2)),
704 wr,
705 Zero,
706 X(_(1,2),_(1,1)),
707 scale,
708 xNorm);
709
710 //
711 // Scale if necessary
712 //
713 if (scale!=One) {
714 work(_(ki+n,n2)) *= scale;
715 }
716 work(j+n) = X(1,1);
717 work(j+1+n) = X(2,1);
718
719 vMax = max(abs(work(j+n)), abs(work(j+1+n)), vMax);
720 vCrit = bigNum / vMax;
721 }
722 }
723 //
724 // Copy the vector x or Q*x to VL and normalize.
725 //
726 if (!over) {
727 VL(_(ki,n),is) = work(_(ki+n,n2));
728
729 const IndexType ii = blas::iamax(VL(_(ki,n),is)) + ki - 1;
730 const ElementType reMax = One / abs(VL(ii,is));
731 VL(_(ki,n),is) *= reMax;
732
733 VL(_(1,ki-1),is) = Zero;
734
735 } else {
736
737 if (ki<n) {
738 blas::mv(NoTrans, One,
739 VL(_,_(ki+1,n)),
740 work(_(ki+1+n,n2)),
741 work(ki+n),
742 VL(_,ki));
743 }
744
745 const IndexType ii = blas::iamax(VL(_,ki));
746 const ElementType reMax = One / abs(VL(ii,ki));
747 VL(_,ki) *= reMax;
748
749 }
750
751 } else {
752 //
753 // Complex left eigenvector.
754 //
755 // Initial solve:
756 // ((T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI))*X = 0.
757 // ((T(KI+1,KI) T(KI+1,KI+1)) )
758 //
759 if (abs(T(ki,ki+1))>=abs(T(ki+1,ki))) {
760 work(ki+n) = wi / T(ki,ki+1);
761 work(ki+1+n2) = One;
762 } else {
763 work(ki+n) = One;
764 work(ki+1+n2) = -wi / T(ki+1,ki);
765 }
766 work(ki+1+n) = Zero;
767 work(ki+n2) = Zero;
768 //
769 // Form right-hand side
770 //
771 for (IndexType k=ki+2; k<=n; ++k) {
772 work(k+n) = -work(ki+n)*T(ki,k);
773 work(k+n2) = -work(ki+1+n2)*T(ki+1,k);
774 }
775 //
776 // Solve complex quasi-triangular system:
777 // ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
778 //
779 ElementType vMax = One;
780 ElementType vCrit = bigNum;
781
782 IndexType jNext = ki + 2;
783 for (IndexType j=ki+2; j<=n; ++j) {
784 if (j<jNext) {
785 continue;
786 }
787 IndexType j1 = j;
788 IndexType j2 = j;
789 jNext = j + 1;
790 if (j<n) {
791 if (T(j+1,j)!=Zero) {
792 j2 = j + 1;
793 jNext = j + 2;
794 }
795 }
796
797 if (j1==j2) {
798 //
799 // 1-by-1 diagonal block
800 //
801 // Scale if necessary to avoid overflow when
802 // forming the right-hand side elements.
803 //
804 if (work(j)>vCrit) {
805 const ElementType rec = One / vMax;
806 work(_(ki+n,n2)) *= rec;
807 work(_(ki+n2,n2+n)) *= rec;
808 vMax = One;
809 vCrit = bigNum;
810 }
811
812 work(j+n) -= T(_(ki+2,j-1),j)*work(_(ki+2+n,n+j-1));
813 work(j+n2) -= T(_(ki+2,j-1),j)*work(_(ki+2+n2,n2+j-1));
814 //
815 // Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
816 //
817 laln2(true,
818 IndexType(2),
819 safeMin,
820 One,
821 T(_(j,j),_(j,j)),
822 One,
823 One,
824 Work(_(j,j), _(2,3)),
825 wr,
826 -wi,
827 X(_(1,1),_(1,2)),
828 scale,
829 xNorm);
830
831 //
832 // Scale if necessary
833 //
834 if (scale!=One) {
835 work(_(ki+n,n2)) *= scale;
836 work(_(ki+n2,n2+n)) *= scale;
837 }
838 work(j+n) = X(1,1);
839 work(j+n2) = X(1,2);
840 vMax = max(abs(work(j+n)), abs(work(j+n2)), vMax);
841 vCrit = bigNum / vMax;
842
843 } else {
844 //
845 // 2-by-2 diagonal block
846 //
847 // Scale if necessary to avoid overflow when forming
848 // the right-hand side elements.
849 //
850 const ElementType beta = max(work(j), work(j+1));
851 if (beta>vCrit) {
852 const ElementType rec = One / vMax;
853 work(_(ki+n,n2)) *= rec;
854 work(_(ki+n2,n2+n)) *= rec;
855 vMax = One;
856 vCrit = bigNum;
857 }
858
859 const auto _work1 = work(_(ki+2+n,n+j-1));
860 const auto _work2 = work(_(ki+2+n2,n2+j-1));
861
862 work(j+n) -= T(_(ki+2,j-1),j) * _work1;
863 work(j+n2) -= T(_(ki+2,j-1),j) * _work2;
864 work(j+1+n) -= T(_(ki+2,j-1),j+1) * _work1;
865 work(j+1+n2)-= T(_(ki+2,j-1),j+1) * _work2;
866 //
867 // Solve 2-by-2 complex linear equation
868 // ([T(j,j) T(j,j+1) ]**T-(wr-i*wi)*I)*X = SCALE*B
869 // ([T(j+1,j) T(j+1,j+1)] )
870 //
871 laln2(true,
872 IndexType(2),
873 safeMin,
874 One,
875 T(_(j,j+1),_(j,j+1)),
876 One,
877 One,
878 Work(_(j,j+1), _(2,3)),
879 wr,
880 -wi,
881 X(_(1,2),_(1,2)),
882 scale,
883 xNorm);
884 //
885 // Scale if necessary
886 //
887 if (scale!=One) {
888 work(_(ki+n,n2)) *= scale;
889 work(_(ki+n2,n2+n)) *= scale;
890 }
891 work(j+n) = X(1,1);
892 work(j+n2) = X(1,2);
893 work(j+1+n) = X(2,1);
894 work(j+1+n2) = X(2,2);
895 vMax = max(abs(X(1,1)), abs(X(1,2)),
896 abs(X(2,1)), abs(X(2,2)),
897 vMax);
898 vCrit = bigNum / vMax;
899
900 }
901 }
902 //
903 // Copy the vector x or Q*x to VL and normalize.
904 //
905 if (!over) {
906 VL(_(ki,n),is) = work(_(ki+n,n2));
907 VL(_(ki,n),is+1) = work(_(ki+n2,n2+n));
908
909 ElementType eMax = Zero;
910 for (IndexType k=ki; k<=n; ++k) {
911 eMax = max(eMax, abs(VL(k,is)) + abs(VL(k,is+1)));
912 }
913 const ElementType reMax = One / eMax;
914 VL(_(ki,n),is) *= reMax;
915 VL(_(ki,n),is+1) *= reMax;
916
917 VL(_(1,ki-1),is) = Zero;
918 VL(_(1,ki-1),is+1) = Zero;
919 } else {
920 if (ki<n-1) {
921 blas::mv(NoTrans, One,
922 VL(_,_(ki+2,n)),
923 work(_(ki+2+n,n2)),
924 work(ki+n),
925 VL(_,ki));
926 blas::mv(NoTrans, One,
927 VL(_,_(ki+2,n)),
928 work(_(ki+2+n2,n2+n)),
929 work(ki+1+n2),
930 VL(_,ki+1));
931 } else {
932 VL(_,ki) *= work(ki+n);
933 VL(_,ki+1) *= work(ki+1+n2);
934 }
935
936 ElementType eMax = Zero;
937 for (IndexType k=1; k<=n; ++k) {
938 eMax = max(eMax, abs(VL(k,ki)) + abs(VL(k,ki+1)));
939 }
940 const ElementType reMax = One / eMax;
941 VL(_,ki) *= reMax;
942 VL(_,ki+1) *= reMax;
943 }
944 }
945
946 ++is;
947 if (ip!=0) {
948 ++is;
949 }
950 if (ip==-1) {
951 ip = 0;
952 }
953 if (ip==1) {
954 ip = -1;
955 }
956 }
957 }
958 }
959
960 //== interface for native lapack ===============================================
961
962 #ifdef CHECK_CXXLAPACK
963
964 template <typename VSELECT, typename MT, typename MVL, typename MVR,
965 typename IndexType, typename VWORK>
966 void
967 trevc_native(bool computeVL,
968 bool computeVR,
969 TREVC::Job howMany,
970 DenseVector<VSELECT> &select,
971 const GeMatrix<MT> &T,
972 GeMatrix<MVL> &VL,
973 GeMatrix<MVR> &VR,
974 IndexType mm,
975 IndexType &m,
976 DenseVector<VWORK> &work)
977 {
978 typedef typename GeMatrix<MT>::ElementType ElementType;
979
980 char SIDE;
981 if (computeVL && computeVR) {
982 SIDE = 'B';
983 } else if (computeVL) {
984 SIDE = 'L';
985 } else if (computeVR) {
986 SIDE = 'R';
987 }
988
989 const char HOWMNY = getF77LapackChar<TREVC::Job>(howMany);
990 LOGICAL *SELECT = 0;
991 const INTEGER N = T.numRows();
992 const INTEGER LDT = T.leadingDimension();
993 const INTEGER LDVL = VL.leadingDimension();
994 const INTEGER LDVR = VR.leadingDimension();
995 const INTEGER MM = mm;
996 INTEGER M = m;
997 INTEGER INFO;
998
999 if (howMany==TREVC::Selected) {
1000 SELECT = new LOGICAL[N];
1001 for (INTEGER i=1; i<=N; ++i) {
1002 SELECT[i] = select(i);
1003 }
1004 }
1005
1006 if (IsSame<ElementType,DOUBLE>::value) {
1007 LAPACK_IMPL(dtrevc)(&SIDE,
1008 &HOWMNY,
1009 SELECT,
1010 &N,
1011 T.data(),
1012 &LDT,
1013 VL.data(),
1014 &LDVL,
1015 VR.data(),
1016 &LDVR,
1017 &MM,
1018 &M,
1019 work.data(),
1020 &INFO);
1021 } else {
1022 ASSERT(0);
1023 }
1024
1025 if (howMany==TREVC::Selected) {
1026 ASSERT(SELECT);
1027 delete [] SELECT;
1028 }
1029
1030 ASSERT(INFO>=0);
1031 }
1032
1033 #endif // CHECK_CXXLAPACK
1034
1035 //== public interface ==========================================================
1036
1037 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1038 typename IndexType, typename VWORK>
1039 void
1040 trevc(bool computeVL,
1041 bool computeVR,
1042 TREVC::Job howMany,
1043 DenseVector<VSELECT> &select,
1044 const GeMatrix<MT> &T,
1045 GeMatrix<MVL> &VL,
1046 GeMatrix<MVR> &VR,
1047 IndexType mm,
1048 IndexType &m,
1049 DenseVector<VWORK> &work)
1050 {
1051 LAPACK_DEBUG_OUT("BEGIN: trevc");
1052
1053 typedef typename GeMatrix<MT>::ElementType ElementType;
1054 //
1055 // Test the input parameters
1056 //
1057 ASSERT(T.firstRow()==1);
1058 ASSERT(T.firstCol()==1);
1059 ASSERT(T.numRows()==T.numCols());
1060 ASSERT(VL.firstRow()==1);
1061 ASSERT(VL.firstCol()==1);
1062 ASSERT(VR.firstRow()==1);
1063 ASSERT(VR.firstCol()==1);
1064 # ifdef CHECK_CXXLAPACK
1065 //
1066 // Make copies of output arguments
1067 //
1068 typename DenseVector<VSELECT>::NoView select_org = select;
1069 typename GeMatrix<MVL>::NoView VL_org = VL;
1070 typename GeMatrix<MVR>::NoView VR_org = VR;
1071 typename DenseVector<VWORK>::NoView work_org = work;
1072 # endif
1073
1074 //
1075 // Call implementation
1076 //
1077 trevc_generic(computeVL, computeVR, howMany, select,
1078 T, VL, VR, mm, m, work);
1079
1080 # ifdef CHECK_CXXLAPACK
1081 //
1082 // Make copies of results computed by the generic implementation
1083 //
1084 typename DenseVector<VSELECT>::NoView select_generic = select;
1085 typename GeMatrix<MVL>::NoView VL_generic = VL;
1086 typename GeMatrix<MVR>::NoView VR_generic = VR;
1087 typename DenseVector<VWORK>::NoView work_generic = work;
1088 //
1089 // restore output arguments
1090 //
1091 select = select_org;
1092 VL = VL_org;
1093 VR = VR_org;
1094 work = work_org;
1095 //
1096 // Compare results
1097 //
1098 trevc_native(computeVL, computeVR, howMany, select,
1099 T, VL, VR, mm, m, work);
1100
1101 bool failed = false;
1102 if (! isIdentical(VL_generic, VL, "VL_generic", "VL")) {
1103 std::cerr << "CXXLAPACK: VL_generic = " << VL_generic << std::endl;
1104 std::cerr << "F77LAPACK: VL = " << VL << std::endl;
1105 failed = true;
1106 }
1107
1108 if (! isIdentical(VR_generic, VR, "VR_generic", "_VR")) {
1109 std::cerr << "CXXLAPACK: VR_generic = " << VR_generic << std::endl;
1110 std::cerr << "F77LAPACK: VR = " << VR << std::endl;
1111 failed = true;
1112 }
1113
1114 if (! isIdentical(work_generic, work, "work_generic", "work")) {
1115 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
1116 std::cerr << "F77LAPACK: work = " << work << std::endl;
1117 failed = true;
1118 }
1119
1120 if (failed) {
1121 ASSERT(0);
1122 }
1123 # endif
1124 LAPACK_DEBUG_OUT("END: trevc");
1125 }
1126
1127 //-- forwarding ----------------------------------------------------------------
1128 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1129 typename IndexType, typename VWORK>
1130 void
1131 trevc(bool computeVL,
1132 bool computeVR,
1133 TREVC::Job howMany,
1134 VSELECT &&select,
1135 const MT &T,
1136 MVL &&VL,
1137 MVR &&VR,
1138 IndexType &&m,
1139 VWORK &&work)
1140 {
1141 CHECKPOINT_ENTER;
1142 trevc(computeVL, computeVR, howMany, select, T, VL, VR, m, work);
1143 CHECKPOINT_LEAVE;
1144 }
1145
1146 } } // namespace lapack, flens
1147
1148 #endif // FLENS_LAPACK_EIG_TREVC_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DTREVC( SIDE, HOWMNY, SELECT, N, T, LDT, VL, LDVL, VR,
36 $ LDVR, MM, M, WORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_EIG_TREVC_TCC
45 #define FLENS_LAPACK_EIG_TREVC_TCC 1
46
47 #include <flens/aux/aux.h>
48 #include <flens/blas/blas.h>
49 #include <flens/lapack/lapack.h>
50
51 namespace flens { namespace lapack {
52
53 //== generic lapack implementation =============================================
54
55 template <typename VSELECT, typename MT, typename MVL, typename MVR,
56 typename IndexType, typename VWORK>
57 void
58 trevc_generic(bool computeVL,
59 bool computeVR,
60 TREVC::Job howMany,
61 DenseVector<VSELECT> &select,
62 const GeMatrix<MT> &T,
63 GeMatrix<MVL> &VL,
64 GeMatrix<MVR> &VR,
65 IndexType mm,
66 IndexType &m,
67 DenseVector<VWORK> &work)
68 {
69
70 using std::abs;
71 using flens::max;
72
73 typedef typename GeMatrix<MT>::ElementType ElementType;
74
75 // TODO: define a colmajor GeMatrixView
76 typedef typename GeMatrix<MT>::View GeMatrixView;
77
78 const ElementType Zero(0), One(1);
79
80 const Underscore<IndexType> _;
81 const IndexType n = T.numCols();
82
83 //
84 // .. Local Arrays ..
85 //
86 ElementType _xData[4];
87 GeMatrixView X = typename GeMatrixView::Engine(2, 2, _xData, 2);
88
89 GeMatrixView Work(n, 3, work, n);
90
91 //
92 // Decode and test the input parameters
93 //
94 const bool over = howMany==TREVC::Backtransform;
95 const bool someV = howMany==TREVC::Selected;
96
97 //
98 // Set M to the number of columns required to store the selected
99 // eigenvectors, standardize the array SELECT if necessary, and
100 // test MM.
101 //
102 if (someV) {
103 m = 0;
104 bool pair = false;
105 for (IndexType j=1; j<=n; ++j) {
106 if (pair) {
107 pair = false;
108 select(j) = false;
109 } else {
110 if (j<n) {
111 if (T(j+1,j)==Zero) {
112 if (select(j)) {
113 ++m;
114 }
115 } else {
116 pair = true;
117 if (select(j) || select(j+1)) {
118 select(j) = true;
119 m += 2;
120 }
121 }
122 } else {
123 if (select(n)) {
124 ++m;
125 }
126 }
127 }
128 }
129 } else {
130 m = n;
131 }
132
133 if (mm<m) {
134 ASSERT(0);
135 }
136 //
137 // Quick return if possible.
138 //
139 if (n==0) {
140 return;
141 }
142 //
143 // Set the constants to control overflow.
144 //
145 ElementType underflow = lamch<ElementType>(SafeMin);
146 ElementType overflow = One / underflow;
147 labad(underflow, overflow);
148
149 const ElementType ulp = lamch<ElementType>(Precision);
150 const ElementType smallNum = underflow*(n/ulp);
151 const ElementType bigNum = (One-ulp)/smallNum;
152 //
153 // Compute 1-norm of each column of strictly upper triangular
154 // part of T to control overflow in triangular solver.
155 //
156 work(1) = Zero;
157 for (IndexType j=2; j<=n; ++j) {
158 work(j) = Zero;
159 for (IndexType i=1; i<=j-1; ++i) {
160 work(j) += abs(T(i,j));
161 }
162 }
163 //
164 // Index IP is used to specify the real or complex eigenvalue:
165 // IP = 0, real eigenvalue,
166 // 1, first of conjugate complex pair: (wr,wi)
167 // -1, second of conjugate complex pair: (wr,wi)
168 //
169 const IndexType n2 = 2*n;
170
171 ElementType scale, xNorm, wr, wi;
172
173 if (computeVR) {
174 //
175 // Compute right eigenvectors.
176 //
177
178 IndexType ip = 0;
179 IndexType is = m;
180 for (IndexType ki=n; ki>=1; --ki) {
181
182 if (ip==1) {
183 ip = 0;
184 continue;
185 }
186 if (ki!=1 && T(ki,ki-1)!=Zero) {
187 ip = -1;
188 }
189
190 if (someV) {
191 if (ip==0) {
192 if (!select(ki)) {
193 if (ip==1) {
194 ip = 0;
195 }
196 if (ip==-1) {
197 ip = 1;
198 }
199 continue;
200 }
201 } else {
202 if (!select(ki-1)) {
203 if (ip==1) {
204 ip = 0;
205 }
206 if (ip==-1) {
207 ip = 1;
208 }
209 continue;
210 }
211 }
212 }
213 //
214 // Compute the KI-th eigenvalue (WR,WI).
215 //
216 wr = T(ki,ki);
217 wi = Zero;
218 if (ip!=0) {
219 wi = sqrt(abs(T(ki,ki-1)))*sqrt(abs(T(ki-1,ki)));
220 }
221 const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
222
223 if (ip==0) {
224 //
225 // Real right eigenvector
226 //
227 work(ki+n) = One;
228 //
229 // Form right-hand side
230 // TODO: do this by work(_(...)) = -T(_(...),ki)
231 //
232 for (IndexType k=1; k<=ki-1; ++k) {
233 work(k+n) = -T(k,ki);
234 }
235 //
236 // Solve the upper quasi-triangular system:
237 // (T(1:KI-1,1:KI-1) - WR)*X = SCALE*WORK.
238 //
239 IndexType jNext = ki - 1;
240 for (IndexType j=ki-1; j>=1; --j) {
241 if (j>jNext) {
242 continue;
243 }
244 IndexType j1 = j;
245 IndexType j2 = j;
246 jNext = j - 1;
247 if (j>1) {
248 if (T(j,j-1)!=Zero) {
249 j1 = j - 1;
250 jNext = j - 2;
251 }
252 }
253
254 if (j1==j2) {
255 //
256 // 1-by-1 diagonal block
257 //
258 laln2(false,
259 IndexType(1),
260 safeMin,
261 One,
262 T(_(j,j),_(j,j)),
263 One,
264 One,
265 Work(_(j,j), _(2,2)),
266 wr,
267 Zero,
268 X(_(1,1),_(1,1)),
269 scale,
270 xNorm);
271 //
272 // Scale X(1,1) to avoid overflow when updating
273 // the right-hand side.
274 //
275 if (xNorm>One) {
276 if (work(j)>bigNum/xNorm) {
277 X(1,1) /= xNorm;
278 scale /= xNorm;
279 }
280 }
281 //
282 // Scale if necessary
283 //
284 if (scale!=One) {
285 work(_(1+n,ki+n)) *= scale;
286 }
287 work(j+n) = X(1,1);
288 //
289 // Update right-hand side
290 //
291 work(_(1+n,j-1+n)) -= X(1,1)*T(_(1,j-1),j);
292
293 } else {
294 //
295 // 2-by-2 diagonal block
296 //
297 laln2(false,
298 IndexType(1),
299 safeMin,
300 One,
301 T(_(j-1,j),_(j-1,j)),
302 One,
303 One,
304 Work(_(j-1,j), _(2,2)),
305 wr,
306 Zero,
307 X(_(1,2),_(1,1)),
308 scale,
309 xNorm);
310
311 //
312 // Scale X(1,1) and X(2,1) to avoid overflow when
313 // updating the right-hand side.
314 //
315 if (xNorm>One) {
316 const ElementType beta = max(work(j-1), work(j));
317 if (beta>bigNum/xNorm) {
318 X(1,1) /= xNorm;
319 X(2,1) /= xNorm;
320 scale /= xNorm;
321 }
322 }
323 //
324 // Scale if necessary
325 //
326 if (scale!=One) {
327 work(_(1+n,ki+n)) *= scale;
328 }
329 work(j-1+n) = X(1,1);
330 work(j+n) = X(2,1);
331 //
332 // Update right-hand side
333 //
334 work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
335 work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
336 }
337 }
338 //
339 // Copy the vector x or Q*x to VR and normalize.
340 //
341 if (!over) {
342 VR(_(1,ki),is) = work(_(1+n,ki+n));
343
344 const IndexType ii = blas::iamax(VR(_(1,ki),is));
345 const ElementType reMax = One / abs(VR(ii,is));
346 VR(_(1,ki),is) *= reMax;
347
348 VR(_(ki+1,n),is) = Zero;
349 } else {
350 if (ki>1) {
351 blas::mv(NoTrans, One,
352 VR(_,_(1,ki-1)),
353 work(_(1+n,n+ki-1)),
354 work(ki+n),
355 VR(_,ki));
356 }
357 const IndexType ii = blas::iamax(VR(_,ki));
358 const ElementType reMax = One / abs(VR(ii,ki));
359 VR(_,ki) *= reMax;
360 }
361
362 } else {
363 //
364 // Complex right eigenvector.
365 //
366 // Initial solve
367 // [ (T(KI-1,KI-1) T(KI-1,KI) ) - (WR + I* WI)]*X = 0.
368 // [ (T(KI,KI-1) T(KI,KI) ) ]
369 //
370 if (abs(T(ki-1,ki))>=abs(T(ki,ki-1))) {
371 work(ki-1+n) = One;
372 work(ki+n2) = wi / T(ki-1,ki);
373 } else {
374 work(ki-1+n ) = -wi / T(ki,ki-1);
375 work(ki+n2 ) = One;
376 }
377 work(ki+n) = Zero;
378 work(ki-1+n2) = Zero;
379 //
380 // Form right-hand side
381 //
382 for (IndexType k=1; k<=ki-2; ++k) {
383 work(k+n) = -work(ki-1+n) * T(k,ki-1);
384 work(k+n2) = -work(ki+n2) * T(k,ki);
385 }
386 //
387 // Solve upper quasi-triangular system:
388 // (T(1:KI-2,1:KI-2) - (WR+i*WI))*X = SCALE*(WORK+i*WORK2)
389 //
390 IndexType jNext = ki - 2;
391 for (IndexType j=ki-2; j>=1; --j) {
392 if (j>jNext) {
393 continue;
394 }
395 IndexType j1 = j;
396 IndexType j2 = j;
397 jNext = j - 1;
398 if (j>1) {
399 if (T(j,j-1)!=Zero) {
400 j1 = j - 1;
401 jNext = j - 2;
402 }
403 }
404
405 if (j1==j2) {
406 //
407 // 1-by-1 diagonal block
408 //
409 laln2(false,
410 IndexType(2),
411 safeMin,
412 One,
413 T(_(j,j),_(j,j)),
414 One,
415 One,
416 Work(_(j,j), _(2,3)),
417 wr,
418 wi,
419 X(_(1,1),_(1,2)),
420 scale,
421 xNorm);
422 //
423 // Scale X(1,1) and X(1,2) to avoid overflow when
424 // updating the right-hand side.
425 //
426 if (xNorm>One) {
427 if (work(j)>bigNum/xNorm) {
428 X(1,1) /= xNorm;
429 X(1,2) /= xNorm;
430 scale /= xNorm;
431 }
432 }
433 //
434 // Scale if necessary
435 //
436 if (scale!=One) {
437 work(_(1+n,ki+n)) *= scale;
438 work(_(1+n2,ki+n2)) *= scale;
439 }
440 work(j+n) = X(1,1);
441 work(j+n2) = X(1,2);
442 //
443 // Update the right-hand side
444 //
445 work(_(1+n,j-1+n)) -= X(1,1)*T(_(1,j-1),j);
446 work(_(1+n2,j-1+n2)) -= X(1,2)*T(_(1,j-1),j);
447
448 } else {
449 //
450 // 2-by-2 diagonal block
451 //
452 laln2(false,
453 IndexType(2),
454 safeMin,
455 One,
456 T(_(j-1,j),_(j-1,j)),
457 One,
458 One,
459 Work(_(j-1,j), _(2,3)),
460 wr,
461 wi,
462 X(_(1,2),_(1,2)),
463 scale,
464 xNorm);
465
466 //
467 // Scale X to avoid overflow when updating
468 // the right-hand side.
469 //
470 if (xNorm>One) {
471 const ElementType beta = max(work(j-1), work(j));
472 if (beta>bigNum/xNorm) {
473 const ElementType rec = One / xNorm;
474 X(1,1) *= rec;
475 X(1,2) *= rec;
476 X(2,1) *= rec;
477 X(2,2) *= rec;
478 scale *= rec;
479 }
480 }
481 //
482 // Scale if necessary
483 //
484 if (scale!=One) {
485 work(_(1+n,ki+n)) *= scale;
486 work(_(1+n2,ki+n2)) *= scale;
487 }
488 work(j-1+n) = X(1,1);
489 work(j+n) = X(2,1);
490 work(j-1+n2) = X(1,2);
491 work(j+n2) = X(2,2);
492 //
493 // Update the right-hand side
494 //
495 work(_(1+n,j-2+n)) -= X(1,1)*T(_(1,j-2),j-1);
496 work(_(1+n,j-2+n)) -= X(2,1)*T(_(1,j-2),j);
497
498 work(_(1+n2,j-2+n2)) -= X(1,2)*T(_(1,j-2),j-1);
499 work(_(1+n2,j-2+n2)) -= X(2,2)*T(_(1,j-2),j);
500 }
501 }
502 //
503 // Copy the vector x or Q*x to VR and normalize.
504 //
505 if (!over) {
506 VR(_(1,ki),is-1) = work(_(1+n,ki+n));
507 VR(_(1,ki),is) = work(_(1+n2,ki+n2));
508
509 ElementType eMax = Zero;
510 for (IndexType k=1; k<=ki; ++k) {
511 eMax = max(eMax, abs(VR(k,is-1))+abs(VR(k,is)));
512 }
513
514 const ElementType reMax = One / eMax;
515 VR(_(1,ki),is-1) *= reMax;
516 VR(_(1,ki),is) *= reMax;
517
518 VR(_(ki+1,n),is-1) = Zero;
519 VR(_(ki+1,n),is) = Zero;
520
521 } else {
522
523 if (ki>2) {
524 blas::mv(NoTrans, One,
525 VR(_,_(1,ki-2)),
526 work(_(1+n,ki-2+n)),
527 work(ki-1+n),
528 VR(_,ki-1));
529 blas::mv(NoTrans, One,
530 VR(_,_(1,ki-2)),
531 work(_(n2+1,n2+ki-2)),
532 work(ki+n2),
533 VR(_,ki));
534 } else {
535 VR(_,ki-1) *= work(ki-1+n);
536 VR(_,ki) *= work(ki+n2);
537 }
538
539 ElementType eMax = Zero;
540 for (IndexType k=1; k<=n; ++k) {
541 eMax = max(eMax, abs(VR(k,ki-1)) + abs(VR(k,ki)));
542 }
543 const ElementType reMax = One / eMax;
544 VR(_,ki-1) *= reMax;
545 VR(_,ki) *= reMax;
546 }
547 }
548
549 --is;
550 if (ip!=0) {
551 --is;
552 }
553 if (ip==1) {
554 ip = 0;
555 }
556 if (ip==-1) {
557 ip = 1;
558 }
559 }
560 }
561
562 if (computeVL) {
563 //
564 // Compute left eigenvectors.
565 //
566 IndexType ip = 0;
567 IndexType is = 1;
568 for (IndexType ki=1; ki<=n; ++ki) {
569
570 if (ip==-1) {
571 ip = 0;
572 continue;
573 }
574 if (ki!=n && T(ki+1,ki)!=Zero) {
575 ip = 1;
576 }
577
578 if (someV) {
579 if (!select(ki)) {
580 if (ip==-1) {
581 ip = 0;
582 }
583 if (ip==1) {
584 ip = -1;
585 }
586 continue;
587 }
588 }
589 //
590 // Compute the KI-th eigenvalue (WR,WI).
591 //
592 ElementType wr = T(ki,ki);
593 ElementType wi = Zero;
594 if (ip!=0) {
595 wi = sqrt(abs(T(ki,ki+1))) * sqrt(abs(T(ki+1,ki)));
596 }
597 const ElementType safeMin = max(ulp*(abs(wr)+abs(wi)), smallNum);
598
599 if (ip==0) {
600 //
601 // Real left eigenvector.
602 //
603 work(ki+n) = One;
604 //
605 // Form right-hand side
606 //
607 for (IndexType k=ki+1; k<=n; ++k) {
608 work(k+n) = -T(ki,k);
609 }
610 //
611 // Solve the quasi-triangular system:
612 // (T(KI+1:N,KI+1:N) - WR)**T*X = SCALE*WORK
613 //
614 ElementType vMax = One;
615 ElementType vCrit = bigNum;
616
617 IndexType jNext = ki + 1;
618 for (IndexType j=ki+1; j<=n; ++j) {
619 if (j<jNext) {
620 continue;
621 }
622 IndexType j1 = j;
623 IndexType j2 = j;
624 jNext = j + 1;
625 if (j<n) {
626 if (T(j+1,j)!=Zero) {
627 j2 = j + 1;
628 jNext = j + 2;
629 }
630 }
631
632 if (j1==j2) {
633 //
634 // 1-by-1 diagonal block
635 //
636 // Scale if necessary to avoid overflow when forming
637 // the right-hand side.
638 //
639 if (work(j)>vCrit) {
640 const ElementType rec = One / vMax;
641 work(_(ki+n,n2)) *= rec;
642 vMax = One;
643 vCrit = bigNum;
644 }
645
646 work(j+n) -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
647 //
648 // Solve (T(J,J)-WR)**T*X = WORK
649 //
650 laln2(false,
651 IndexType(1),
652 safeMin,
653 One,
654 T(_(j,j),_(j,j)),
655 One,
656 One,
657 Work(_(j,j), _(2,2)),
658 wr,
659 Zero,
660 X(_(1,1),_(1,1)),
661 scale,
662 xNorm);
663
664 //
665 // Scale if necessary
666 //
667 if (scale!=One) {
668 work(_(ki+n,n2)) *= scale;
669 }
670 work(j+n) = X(1,1);
671 vMax = max(abs(work(j+n)), vMax);
672 vCrit = bigNum / vMax;
673
674 } else {
675 //
676 // 2-by-2 diagonal block
677 //
678 // Scale if necessary to avoid overflow when forming
679 // the right-hand side.
680 //
681 const ElementType beta = max(work(j), work(j+1));
682 if (beta>vCrit) {
683 const ElementType rec = One / vMax;
684 work(_(ki+n,n2)) *= rec;
685 vMax = One;
686 vCrit = bigNum;
687 }
688
689 work(j+n) -= T(_(ki+1,j-1),j)*work(_(ki+1+n,n+j-1));
690 work(j+1+n) -= T(_(ki+1,j-1),j+1)*work(_(ki+1+n,n+j-1));
691 //
692 // Solve
693 // [T(J,J)-WR T(J,J+1) ]**T * X = SCALE*( WORK1 )
694 // [T(J+1,J) T(J+1,J+1)-WR] ( WORK2 )
695 //
696 laln2(true,
697 IndexType(1),
698 safeMin,
699 One,
700 T(_(j,j+1),_(j,j+1)),
701 One,
702 One,
703 Work(_(j,j+1), _(2,2)),
704 wr,
705 Zero,
706 X(_(1,2),_(1,1)),
707 scale,
708 xNorm);
709
710 //
711 // Scale if necessary
712 //
713 if (scale!=One) {
714 work(_(ki+n,n2)) *= scale;
715 }
716 work(j+n) = X(1,1);
717 work(j+1+n) = X(2,1);
718
719 vMax = max(abs(work(j+n)), abs(work(j+1+n)), vMax);
720 vCrit = bigNum / vMax;
721 }
722 }
723 //
724 // Copy the vector x or Q*x to VL and normalize.
725 //
726 if (!over) {
727 VL(_(ki,n),is) = work(_(ki+n,n2));
728
729 const IndexType ii = blas::iamax(VL(_(ki,n),is)) + ki - 1;
730 const ElementType reMax = One / abs(VL(ii,is));
731 VL(_(ki,n),is) *= reMax;
732
733 VL(_(1,ki-1),is) = Zero;
734
735 } else {
736
737 if (ki<n) {
738 blas::mv(NoTrans, One,
739 VL(_,_(ki+1,n)),
740 work(_(ki+1+n,n2)),
741 work(ki+n),
742 VL(_,ki));
743 }
744
745 const IndexType ii = blas::iamax(VL(_,ki));
746 const ElementType reMax = One / abs(VL(ii,ki));
747 VL(_,ki) *= reMax;
748
749 }
750
751 } else {
752 //
753 // Complex left eigenvector.
754 //
755 // Initial solve:
756 // ((T(KI,KI) T(KI,KI+1) )**T - (WR - I* WI))*X = 0.
757 // ((T(KI+1,KI) T(KI+1,KI+1)) )
758 //
759 if (abs(T(ki,ki+1))>=abs(T(ki+1,ki))) {
760 work(ki+n) = wi / T(ki,ki+1);
761 work(ki+1+n2) = One;
762 } else {
763 work(ki+n) = One;
764 work(ki+1+n2) = -wi / T(ki+1,ki);
765 }
766 work(ki+1+n) = Zero;
767 work(ki+n2) = Zero;
768 //
769 // Form right-hand side
770 //
771 for (IndexType k=ki+2; k<=n; ++k) {
772 work(k+n) = -work(ki+n)*T(ki,k);
773 work(k+n2) = -work(ki+1+n2)*T(ki+1,k);
774 }
775 //
776 // Solve complex quasi-triangular system:
777 // ( T(KI+2,N:KI+2,N) - (WR-i*WI) )*X = WORK1+i*WORK2
778 //
779 ElementType vMax = One;
780 ElementType vCrit = bigNum;
781
782 IndexType jNext = ki + 2;
783 for (IndexType j=ki+2; j<=n; ++j) {
784 if (j<jNext) {
785 continue;
786 }
787 IndexType j1 = j;
788 IndexType j2 = j;
789 jNext = j + 1;
790 if (j<n) {
791 if (T(j+1,j)!=Zero) {
792 j2 = j + 1;
793 jNext = j + 2;
794 }
795 }
796
797 if (j1==j2) {
798 //
799 // 1-by-1 diagonal block
800 //
801 // Scale if necessary to avoid overflow when
802 // forming the right-hand side elements.
803 //
804 if (work(j)>vCrit) {
805 const ElementType rec = One / vMax;
806 work(_(ki+n,n2)) *= rec;
807 work(_(ki+n2,n2+n)) *= rec;
808 vMax = One;
809 vCrit = bigNum;
810 }
811
812 work(j+n) -= T(_(ki+2,j-1),j)*work(_(ki+2+n,n+j-1));
813 work(j+n2) -= T(_(ki+2,j-1),j)*work(_(ki+2+n2,n2+j-1));
814 //
815 // Solve (T(J,J)-(WR-i*WI))*(X11+i*X12)= WK+I*WK2
816 //
817 laln2(true,
818 IndexType(2),
819 safeMin,
820 One,
821 T(_(j,j),_(j,j)),
822 One,
823 One,
824 Work(_(j,j), _(2,3)),
825 wr,
826 -wi,
827 X(_(1,1),_(1,2)),
828 scale,
829 xNorm);
830
831 //
832 // Scale if necessary
833 //
834 if (scale!=One) {
835 work(_(ki+n,n2)) *= scale;
836 work(_(ki+n2,n2+n)) *= scale;
837 }
838 work(j+n) = X(1,1);
839 work(j+n2) = X(1,2);
840 vMax = max(abs(work(j+n)), abs(work(j+n2)), vMax);
841 vCrit = bigNum / vMax;
842
843 } else {
844 //
845 // 2-by-2 diagonal block
846 //
847 // Scale if necessary to avoid overflow when forming
848 // the right-hand side elements.
849 //
850 const ElementType beta = max(work(j), work(j+1));
851 if (beta>vCrit) {
852 const ElementType rec = One / vMax;
853 work(_(ki+n,n2)) *= rec;
854 work(_(ki+n2,n2+n)) *= rec;
855 vMax = One;
856 vCrit = bigNum;
857 }
858
859 const auto _work1 = work(_(ki+2+n,n+j-1));
860 const auto _work2 = work(_(ki+2+n2,n2+j-1));
861
862 work(j+n) -= T(_(ki+2,j-1),j) * _work1;
863 work(j+n2) -= T(_(ki+2,j-1),j) * _work2;
864 work(j+1+n) -= T(_(ki+2,j-1),j+1) * _work1;
865 work(j+1+n2)-= T(_(ki+2,j-1),j+1) * _work2;
866 //
867 // Solve 2-by-2 complex linear equation
868 // ([T(j,j) T(j,j+1) ]**T-(wr-i*wi)*I)*X = SCALE*B
869 // ([T(j+1,j) T(j+1,j+1)] )
870 //
871 laln2(true,
872 IndexType(2),
873 safeMin,
874 One,
875 T(_(j,j+1),_(j,j+1)),
876 One,
877 One,
878 Work(_(j,j+1), _(2,3)),
879 wr,
880 -wi,
881 X(_(1,2),_(1,2)),
882 scale,
883 xNorm);
884 //
885 // Scale if necessary
886 //
887 if (scale!=One) {
888 work(_(ki+n,n2)) *= scale;
889 work(_(ki+n2,n2+n)) *= scale;
890 }
891 work(j+n) = X(1,1);
892 work(j+n2) = X(1,2);
893 work(j+1+n) = X(2,1);
894 work(j+1+n2) = X(2,2);
895 vMax = max(abs(X(1,1)), abs(X(1,2)),
896 abs(X(2,1)), abs(X(2,2)),
897 vMax);
898 vCrit = bigNum / vMax;
899
900 }
901 }
902 //
903 // Copy the vector x or Q*x to VL and normalize.
904 //
905 if (!over) {
906 VL(_(ki,n),is) = work(_(ki+n,n2));
907 VL(_(ki,n),is+1) = work(_(ki+n2,n2+n));
908
909 ElementType eMax = Zero;
910 for (IndexType k=ki; k<=n; ++k) {
911 eMax = max(eMax, abs(VL(k,is)) + abs(VL(k,is+1)));
912 }
913 const ElementType reMax = One / eMax;
914 VL(_(ki,n),is) *= reMax;
915 VL(_(ki,n),is+1) *= reMax;
916
917 VL(_(1,ki-1),is) = Zero;
918 VL(_(1,ki-1),is+1) = Zero;
919 } else {
920 if (ki<n-1) {
921 blas::mv(NoTrans, One,
922 VL(_,_(ki+2,n)),
923 work(_(ki+2+n,n2)),
924 work(ki+n),
925 VL(_,ki));
926 blas::mv(NoTrans, One,
927 VL(_,_(ki+2,n)),
928 work(_(ki+2+n2,n2+n)),
929 work(ki+1+n2),
930 VL(_,ki+1));
931 } else {
932 VL(_,ki) *= work(ki+n);
933 VL(_,ki+1) *= work(ki+1+n2);
934 }
935
936 ElementType eMax = Zero;
937 for (IndexType k=1; k<=n; ++k) {
938 eMax = max(eMax, abs(VL(k,ki)) + abs(VL(k,ki+1)));
939 }
940 const ElementType reMax = One / eMax;
941 VL(_,ki) *= reMax;
942 VL(_,ki+1) *= reMax;
943 }
944 }
945
946 ++is;
947 if (ip!=0) {
948 ++is;
949 }
950 if (ip==-1) {
951 ip = 0;
952 }
953 if (ip==1) {
954 ip = -1;
955 }
956 }
957 }
958 }
959
960 //== interface for native lapack ===============================================
961
962 #ifdef CHECK_CXXLAPACK
963
964 template <typename VSELECT, typename MT, typename MVL, typename MVR,
965 typename IndexType, typename VWORK>
966 void
967 trevc_native(bool computeVL,
968 bool computeVR,
969 TREVC::Job howMany,
970 DenseVector<VSELECT> &select,
971 const GeMatrix<MT> &T,
972 GeMatrix<MVL> &VL,
973 GeMatrix<MVR> &VR,
974 IndexType mm,
975 IndexType &m,
976 DenseVector<VWORK> &work)
977 {
978 typedef typename GeMatrix<MT>::ElementType ElementType;
979
980 char SIDE;
981 if (computeVL && computeVR) {
982 SIDE = 'B';
983 } else if (computeVL) {
984 SIDE = 'L';
985 } else if (computeVR) {
986 SIDE = 'R';
987 }
988
989 const char HOWMNY = getF77LapackChar<TREVC::Job>(howMany);
990 LOGICAL *SELECT = 0;
991 const INTEGER N = T.numRows();
992 const INTEGER LDT = T.leadingDimension();
993 const INTEGER LDVL = VL.leadingDimension();
994 const INTEGER LDVR = VR.leadingDimension();
995 const INTEGER MM = mm;
996 INTEGER M = m;
997 INTEGER INFO;
998
999 if (howMany==TREVC::Selected) {
1000 SELECT = new LOGICAL[N];
1001 for (INTEGER i=1; i<=N; ++i) {
1002 SELECT[i] = select(i);
1003 }
1004 }
1005
1006 if (IsSame<ElementType,DOUBLE>::value) {
1007 LAPACK_IMPL(dtrevc)(&SIDE,
1008 &HOWMNY,
1009 SELECT,
1010 &N,
1011 T.data(),
1012 &LDT,
1013 VL.data(),
1014 &LDVL,
1015 VR.data(),
1016 &LDVR,
1017 &MM,
1018 &M,
1019 work.data(),
1020 &INFO);
1021 } else {
1022 ASSERT(0);
1023 }
1024
1025 if (howMany==TREVC::Selected) {
1026 ASSERT(SELECT);
1027 delete [] SELECT;
1028 }
1029
1030 ASSERT(INFO>=0);
1031 }
1032
1033 #endif // CHECK_CXXLAPACK
1034
1035 //== public interface ==========================================================
1036
1037 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1038 typename IndexType, typename VWORK>
1039 void
1040 trevc(bool computeVL,
1041 bool computeVR,
1042 TREVC::Job howMany,
1043 DenseVector<VSELECT> &select,
1044 const GeMatrix<MT> &T,
1045 GeMatrix<MVL> &VL,
1046 GeMatrix<MVR> &VR,
1047 IndexType mm,
1048 IndexType &m,
1049 DenseVector<VWORK> &work)
1050 {
1051 LAPACK_DEBUG_OUT("BEGIN: trevc");
1052
1053 typedef typename GeMatrix<MT>::ElementType ElementType;
1054 //
1055 // Test the input parameters
1056 //
1057 ASSERT(T.firstRow()==1);
1058 ASSERT(T.firstCol()==1);
1059 ASSERT(T.numRows()==T.numCols());
1060 ASSERT(VL.firstRow()==1);
1061 ASSERT(VL.firstCol()==1);
1062 ASSERT(VR.firstRow()==1);
1063 ASSERT(VR.firstCol()==1);
1064 # ifdef CHECK_CXXLAPACK
1065 //
1066 // Make copies of output arguments
1067 //
1068 typename DenseVector<VSELECT>::NoView select_org = select;
1069 typename GeMatrix<MVL>::NoView VL_org = VL;
1070 typename GeMatrix<MVR>::NoView VR_org = VR;
1071 typename DenseVector<VWORK>::NoView work_org = work;
1072 # endif
1073
1074 //
1075 // Call implementation
1076 //
1077 trevc_generic(computeVL, computeVR, howMany, select,
1078 T, VL, VR, mm, m, work);
1079
1080 # ifdef CHECK_CXXLAPACK
1081 //
1082 // Make copies of results computed by the generic implementation
1083 //
1084 typename DenseVector<VSELECT>::NoView select_generic = select;
1085 typename GeMatrix<MVL>::NoView VL_generic = VL;
1086 typename GeMatrix<MVR>::NoView VR_generic = VR;
1087 typename DenseVector<VWORK>::NoView work_generic = work;
1088 //
1089 // restore output arguments
1090 //
1091 select = select_org;
1092 VL = VL_org;
1093 VR = VR_org;
1094 work = work_org;
1095 //
1096 // Compare results
1097 //
1098 trevc_native(computeVL, computeVR, howMany, select,
1099 T, VL, VR, mm, m, work);
1100
1101 bool failed = false;
1102 if (! isIdentical(VL_generic, VL, "VL_generic", "VL")) {
1103 std::cerr << "CXXLAPACK: VL_generic = " << VL_generic << std::endl;
1104 std::cerr << "F77LAPACK: VL = " << VL << std::endl;
1105 failed = true;
1106 }
1107
1108 if (! isIdentical(VR_generic, VR, "VR_generic", "_VR")) {
1109 std::cerr << "CXXLAPACK: VR_generic = " << VR_generic << std::endl;
1110 std::cerr << "F77LAPACK: VR = " << VR << std::endl;
1111 failed = true;
1112 }
1113
1114 if (! isIdentical(work_generic, work, "work_generic", "work")) {
1115 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
1116 std::cerr << "F77LAPACK: work = " << work << std::endl;
1117 failed = true;
1118 }
1119
1120 if (failed) {
1121 ASSERT(0);
1122 }
1123 # endif
1124 LAPACK_DEBUG_OUT("END: trevc");
1125 }
1126
1127 //-- forwarding ----------------------------------------------------------------
1128 template <typename VSELECT, typename MT, typename MVL, typename MVR,
1129 typename IndexType, typename VWORK>
1130 void
1131 trevc(bool computeVL,
1132 bool computeVR,
1133 TREVC::Job howMany,
1134 VSELECT &&select,
1135 const MT &T,
1136 MVL &&VL,
1137 MVR &&VR,
1138 IndexType &&m,
1139 VWORK &&work)
1140 {
1141 CHECKPOINT_ENTER;
1142 trevc(computeVL, computeVR, howMany, select, T, VL, VR, m, work);
1143 CHECKPOINT_LEAVE;
1144 }
1145
1146 } } // namespace lapack, flens
1147
1148 #endif // FLENS_LAPACK_EIG_TREVC_TCC