1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
35 $ ILOZ, IHIZ, Z, LDZ, INFO )
36 *
37 * -- LAPACK auxiliary routine (version 3.2) --
38 * Univ. of Tennessee, Univ. of California Berkeley,
39 * Univ. of Colorado Denver and NAG Ltd..
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_EIG_LAHQR_TCC
44 #define FLENS_LAPACK_EIG_LAHQR_TCC 1
45
46 #include <cmath>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename IndexType, typename MH, typename VWR, typename VWI,
55 typename MZ>
56 IndexType
57 lahqr_generic(bool wantT,
58 bool wantZ,
59 IndexType iLo,
60 IndexType iHi,
61 GeMatrix<MH> &H,
62 DenseVector<VWR> &wr,
63 DenseVector<VWI> &wi,
64 IndexType iLoZ,
65 IndexType iHiZ,
66 GeMatrix<MZ> &Z)
67 {
68 using std::abs;
69 using std::max;
70 using std::min;
71
72 typedef typename GeMatrix<MH>::ElementType T;
73
74 const Underscore<IndexType> _;
75 const T Zero(0), One(1), Two(2);
76 const T Dat1 = T(3)/T(4),
77 Dat2 = T(-0.4375);
78 const IndexType itMax = 30;
79 const IndexType n = H.numRows();
80
81 typedef typename GeMatrix<MH>::VectorView VectorView;
82 T vBuffer[3];
83 VectorView v = typename VectorView::Engine(3, vBuffer);
84
85 //
86 // Quick return if possible
87 //
88 if (n==0) {
89 return 0;
90 }
91 if (iLo==iHi) {
92 wr(iLo) = H(iLo, iLo);
93 wi(iLo) = Zero;
94 return 0;
95 }
96 //
97 // ==== clear out the trash ====
98 //
99 for (IndexType j=iLo; j<=iHi-3; ++j) {
100 H(j+2, j) = Zero;
101 H(j+3, j) = Zero;
102 }
103 if (iLo<=iHi-2) {
104 H(iHi, iHi-2) = Zero;
105 }
106
107 const IndexType nh = iHi - iLo + 1;
108 //
109 // Set machine-dependent constants for the stopping criterion.
110 //
111 T safeMin = lamch<T>(SafeMin);
112 T safeMax = One / safeMin;
113 labad(safeMin, safeMax);
114 const T ulp = lamch<T>(Precision);
115 const T smallNum = safeMin*(T(nh)/ulp);
116 //
117 // i1 and i2 are the indices of the first row and last column of H
118 // to which transformations must be applied. If eigenvalues only are
119 // being computed, i1 and i2 are set inside the main loop.
120 //
121 IndexType i1 = -1, i2 = -1;
122 if (wantT) {
123 i1 = 1;
124 i2 = n;
125 }
126 //
127 // The main loop begins here. Variable i is the loop index and decreases from
128 // iHi to iLo in steps of 1 or 2. Each iteration of the loop works
129 // with the active submatrix in rows and columns l to i.
130 // Eigenvalues i+1 to iHi have already converged. Either l = iLo or
131 // H(l,l-1) is negligible so that the matrix splits.
132 //
133 IndexType i = iHi;
134 while (true) {
135 IndexType l = iLo;
136 if (i<iLo) {
137 break;
138 }
139 //
140 // Perform QR iterations on rows and columns iLo to i until a
141 // submatrix of order 1 or 2 splits off at the bottom because a
142 // subdiagonal element has become negligible.
143 //
144 IndexType its;
145 for (its=0; its<=itMax; ++its) {
146 //
147 // Look for a single small subdiagonal element.
148 //
149 IndexType k;
150 for (k=i; k>=l+1; --k) {
151 if (abs(H(k,k-1))<=smallNum) {
152 break;
153 }
154 T test = abs(H(k-1,k-1)) + abs(H(k,k));
155 if (test==Zero) {
156 if (k-2>=iLo) {
157 test += abs(H(k-1,k-2));
158 }
159 if (k+1<=iHi) {
160 test += abs(H(k+1,k));
161 }
162 }
163 // ==== The following is a conservative small subdiagonal
164 // . deflation criterion due to Ahues & Tisseur (LAWN 122,
165 // . 1997). It has better mathematical foundation and
166 // . improves accuracy in some cases. ====
167 if (abs(H(k,k-1))<=ulp*test) {
168 const T ab = max(abs(H(k, k-1)), abs(H(k-1, k)));
169 const T ba = min(abs(H(k, k-1)), abs(H(k-1, k)));
170 const T aa = max(abs(H(k, k)), abs(H(k-1, k-1)-H(k, k)));
171 const T bb = min(abs(H(k, k)), abs(H(k-1, k-1)-H(k, k)));
172 const T s = aa + ab;
173 if (ba*(ab/s)<=max(smallNum, ulp*(bb*(aa/s)))) {
174 break;
175 }
176 }
177 }
178 l = k;
179
180 if (l>iLo) {
181 //
182 // H(l,l-1) is negligible
183 //
184 H(l, l-1) = Zero;
185 }
186 //
187 // Exit from loop if a submatrix of order 1 or 2 has split off.
188 //
189 if (l>=i-1) {
190 break;
191 }
192 //
193 // Now the active submatrix is in rows and columns l to i. If
194 // eigenvalues only are being computed, only the active submatrix
195 // need be transformed.
196 //
197 if (!wantT) {
198 i1 = l;
199 i2 = i;
200 }
201
202 T H11, H12, H21, H22;
203 T rt1r, rt2r, rt1i, rt2i;
204 if (its==10) {
205 //
206 // Exceptional shift.
207 //
208 const T s = abs(H(l+1,l)) + abs(H(l+2, l+1));
209 H11 = Dat1*s + H(l,l);
210 H12 = Dat2*s;
211 H21 = s;
212 H22 = H11;
213 } else if (its==20) {
214 //
215 // Exceptional shift.
216 //
217 const T s = abs(H(i,i-1)) + abs(H(i-1,i-2));
218 H11 = Dat1*s + H(i,i);
219 H12 = Dat2*s;
220 H21 = s;
221 H22 = H11;
222 } else {
223 //
224 // Prepare to use Francis' double shift
225 // (i.e. 2nd degree generalized Rayleigh quotient)
226 //
227 H11 = H(i-1, i-1);
228 H21 = H( i, i-1);
229 H12 = H(i-1, i);
230 H22 = H( i, i);
231 }
232
233 const T s = abs(H11) + abs(H12) + abs(H21) + abs(H22);
234 if (s==Zero) {
235 rt1r = Zero;
236 rt1i = Zero;
237 rt2r = Zero;
238 rt2i = Zero;
239 } else {
240 H11 /= s;
241 H21 /= s;
242 H12 /= s;
243 H22 /= s;
244 const T tr = (H11+H22) / Two;
245 const T det = (H11-tr)*(H22-tr) - H12*H21;
246 const T rtDisc = sqrt(abs(det));
247 if (det>=Zero) {
248 //
249 // ==== complex conjugate shifts ====
250 //
251 rt1r = tr*s;
252 rt2r = rt1r;
253 rt1i = rtDisc*s;
254 rt2i = -rt1i;
255 } else {
256 //
257 // ==== real shifts (use only one of them) ====
258 //
259 rt1r = tr + rtDisc;
260 rt2r = tr - rtDisc;
261 if (abs(rt1r-H22)<=abs(rt2r-H22)) {
262 rt1r *= s;
263 rt2r = rt1r;
264 } else {
265 rt2r *= s;
266 rt1r = rt2r;
267 }
268 rt1i = Zero;
269 rt2i = Zero;
270 }
271 }
272 //
273 // Look for two consecutive small subdiagonal elements.
274 //
275 IndexType m;
276 for (m=i-2; m>=l; --m) {
277 // Determine the effect of starting the double-shift QR
278 // iteration at row m, and see if this would make H(m,m-1)
279 // negligible. (The following uses scaling to avoid
280 // overflows and most underflows.)
281 //
282 T H21S = H(m+1,m);
283 T s = abs(H(m,m)-rt2r) + abs(rt2i) + abs(H21S);
284
285 H21S = H(m+1,m)/s;
286 v(1) = H21S*H(m,m+1)
287 + (H(m,m)-rt1r)*((H(m,m)-rt2r)/s)
288 - rt1i*(rt2i/s);
289 v(2) = H21S*(H(m,m) + H(m+1,m+1)-rt1r-rt2r);
290 v(3) = H21S*H(m+2,m+1);
291
292 s = abs(v(1)) + abs(v(2)) + abs(v(3));
293 v(1) /= s;
294 v(2) /= s;
295 v(3) /= s;
296
297 if (m==l) {
298 break;
299 }
300
301 const T value1 = abs(H(m,m-1))*(abs(v(2))+abs(v(3)));
302 const T value2 = ulp*abs(v(1))
303 *(abs(H(m-1,m-1))+abs(H(m,m))+abs(H(m+1,m+1)));
304 if (value1<=value2) {
305 break;
306 }
307 }
308 //
309 // Double-shift QR step
310 //
311 for (k=m; k<=i-1; ++k) {
312 //
313 // The first iteration of this loop determines a reflection G
314 // from the vector v and applies it from left and right to H,
315 // thus creating a nonzero bulge below the subdiagonal.
316 //
317 // Each subsequent iteration determines a reflection G to
318 // restore the Hessenberg form in the (k-1)th column, and thus
319 // chases the bulge one step toward the bottom of the active
320 // submatrix. 'nr' is the order of G.
321 //
322 T t1, t2, t3, v2, v3;
323
324 IndexType nr = min(IndexType(3), i-k+1);
325 if (k>m) {
326 v(_(1,nr)) = H(_(k,k+nr-1),k-1);
327 }
328 larfg(nr, v(1), v(_(2,nr)), t1);
329 if (k>m) {
330 H(k, k-1) = v(1);
331 H(k+1,k-1) = Zero;
332 if (k<i-1) {
333 H(k+2,k-1) = Zero;
334 }
335 } else if (m>l) {
336 // ==== Use the following instead of
337 // . H( K, K-1 ) = -H( K, K-1 ) to
338 // . avoid a bug when v(2) and v(3)
339 // . underflow. ====
340 H(k, k-1) *= (One-t1);
341 }
342 v2 = v(2);
343 t2 = t1*v2;
344 if (nr==3) {
345 v3 = v(3);
346 t3 = t1*v3;
347 //
348 // Apply G from the left to transform the rows of the matrix
349 // in columns k to i2.
350 //
351 for (IndexType j=k; j<=i2; ++j) {
352 const T sum = H(k,j) + v2*H(k+1,j) + v3*H(k+2,j);
353 H(k, j) -= sum*t1;
354 H(k+1, j) -= sum*t2;
355 H(k+2, j) -= sum*t3;
356 }
357 //
358 // Apply G from the right to transform the columns of the
359 // matrix in rows i1 to min(k+3,i).
360 //
361 for (IndexType j=i1; j<=min(k+3,i); ++j) {
362 const T sum = H(j, k) + v2*H(j,k+1) + v3*H(j,k+2);
363 H(j, k ) -= sum*t1;
364 H(j, k+1) -= sum*t2;
365 H(j, k+2) -= sum*t3;
366 }
367
368 if (wantZ) {
369 //
370 // Accumulate transformations in the matrix Z
371 //
372 for (IndexType j=iLoZ; j<=iHiZ; ++j) {
373 const T sum = Z(j, k) + v2*Z(j, k+1) + v3*Z(j, k+2);
374 Z(j, k ) -= sum*t1;
375 Z(j, k+1) -= sum*t2;
376 Z(j, k+2) -= sum*t3;
377 }
378 }
379 } else if (nr==2) {
380 //
381 // Apply G from the left to transform the rows of the matrix
382 // in columns K to I2.
383 //
384 for (IndexType j=k; j<=i2; ++j) {
385 const T sum = H(k, j) + v2*H(k+1, j);
386 H(k, j) -= sum*t1;
387 H(k+1, j) -= sum*t2;
388 }
389 //
390 // Apply G from the right to transform the columns of the
391 // matrix in rows i1 to min(k+3,i).
392 //
393 for (IndexType j=i1; j<=i; ++j) {
394 const T sum = H(j, k) + v2*H(j, k+1);
395 H(j, k ) -= sum*t1;
396 H(j, k+1) -= sum*t2;
397 }
398
399 if (wantZ) {
400 //
401 // Accumulate transformations in the matrix Z
402 //
403 for (IndexType j=iLoZ; j<=iHiZ; ++j) {
404 const T sum = Z(j, k) + v2*Z(j, k+1);
405 Z(j, k ) -= sum*t1;
406 Z(j, k+1) -= sum*t2;
407 }
408 }
409 }
410 }
411 }
412 //
413 // Failure to converge in remaining number of iterations
414 //
415 if (its>itMax) {
416 return i;
417 }
418
419 if (l==i) {
420 //
421 // H(I,I-1) is negligible: one eigenvalue has converged.
422 //
423 wr(i) = H(i, i);
424 wi(i) = Zero;
425 } else if (l==i-1) {
426 //
427 // H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
428 //
429 // Transform the 2-by-2 submatrix to standard Schur form,
430 // and compute and store the eigenvalues.
431 //
432 T cs, sn;
433 lanv2(H(i-1,i-1), H(i-1,i), H(i,i-1), H(i,i),
434 wr(i-1), wi(i-1), wr(i), wi(i),
435 cs, sn);
436
437 if (wantT) {
438 //
439 // Apply the transformation to the rest of H.
440 //
441 if (i2>i) {
442 const auto cols = _(i+1,i2);
443 blas::rot(H(i-1,cols), H(i,cols), cs, sn);
444 }
445 const auto rows = _(i1,i-2);
446 blas::rot(H(rows, i-1), H(rows, i), cs, sn);
447 }
448 if (wantZ) {
449 //
450 // Apply the transformation to Z.
451 //
452 const auto rows = _(iLoZ, iHiZ);
453 blas::rot(Z(rows, i-1), Z(rows, i), cs, sn);
454 }
455 }
456 //
457 // return to start of the main loop with new value of I.
458 //
459 i = l - 1;
460 }
461 return 0;
462 }
463
464 //== interface for native lapack ===============================================
465
466 #ifdef CHECK_CXXLAPACK
467
468 template <typename IndexType, typename MH, typename VWR, typename VWI,
469 typename MZ>
470 IndexType
471 lahqr_native(bool wantT,
472 bool wantZ,
473 IndexType iLo,
474 IndexType iHi,
475 GeMatrix<MH> &H,
476 DenseVector<VWR> &wr,
477 DenseVector<VWI> &wi,
478 IndexType iLoZ,
479 IndexType iHiZ,
480 GeMatrix<MZ> &Z)
481 {
482 typedef typename GeMatrix<MH>::ElementType T;
483
484 const LOGICAL WANTT = wantT;
485 const LOGICAL WANTZ = wantZ;
486 const INTEGER N = H.numRows();
487 const INTEGER ILO = iLo;
488 const INTEGER IHI = iHi;
489 const INTEGER LDH = H.leadingDimension();
490 const INTEGER ILOZ = iLoZ;
491 const INTEGER IHIZ = iHiZ;
492 const INTEGER LDZ = Z.leadingDimension();
493 INTEGER INFO;
494
495 if (IsSame<T,DOUBLE>::value) {
496 LAPACK_IMPL(dlahqr)(&WANTT,
497 &WANTZ,
498 &N,
499 &ILO,
500 &IHI,
501 H.data(),
502 &LDH,
503 wr.data(),
504 wi.data(),
505 &ILOZ,
506 &IHIZ,
507 Z.data(),
508 &LDZ,
509 &INFO);
510 } else {
511 ASSERT(0);
512 }
513 return INFO;
514 }
515
516 #endif // CHECK_CXXLAPACK
517
518 //== public interface ==========================================================
519
520 template <typename IndexType, typename MH, typename VWR, typename VWI,
521 typename MZ>
522 IndexType
523 lahqr(bool wantT,
524 bool wantZ,
525 IndexType iLo,
526 IndexType iHi,
527 GeMatrix<MH> &H,
528 DenseVector<VWR> &wr,
529 DenseVector<VWI> &wi,
530 IndexType iLoZ,
531 IndexType iHiZ,
532 GeMatrix<MZ> &Z)
533 {
534 LAPACK_DEBUG_OUT("lahqr");
535
536 //
537 // Test the input parameters
538 //
539 using std::max;
540
541 ASSERT(H.firstRow()==1);
542 ASSERT(H.firstCol()==1);
543 ASSERT(H.numRows()==H.numCols());
544 ASSERT(wr.firstIndex()==1);
545 ASSERT(wr.length()==H.numRows());
546 ASSERT(wi.firstIndex()==1);
547 ASSERT(wi.length()==H.numRows());
548 ASSERT(wantZ || (Z.numRows()==H.numCols()));
549 ASSERT(wantZ || (Z.numCols()==H.numCols()));
550
551 // 1 <= ILO <= max(1,IHI); IHI <= N.
552 ASSERT(1<=iLo);
553 ASSERT(iLo<=max(IndexType(1), iHi));
554 ASSERT(iHi<=H.numRows());
555
556 // 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
557 ASSERT(1<=iLoZ);
558 ASSERT(iLoZ<=iLo);
559 ASSERT(iHi<=iHiZ);
560 ASSERT(iHiZ<=H.numRows());
561
562 //
563 // Make copies of output arguments
564 //
565 # ifdef CHECK_CXXLAPACK
566 typename GeMatrix<MH>::NoView H_org = H;
567 typename GeMatrix<MH>::NoView Z_org = Z;
568
569 typename GeMatrix<MH>::NoView _H = H;
570 typename DenseVector<VWR>::NoView _wr = wr;
571 typename DenseVector<VWI>::NoView _wi = wi;
572 typename GeMatrix<MZ>::NoView _Z = Z;
573 # endif
574
575 //
576 // Call implementation
577 //
578 IndexType info = lahqr_generic(wantT, wantZ, iLo, iHi,
579 H, wr, wi, iLoZ, iHiZ, Z);
580
581 //
582 // Compare results
583 //
584 # ifdef CHECK_CXXLAPACK
585 IndexType _info = lahqr_native(wantT, wantZ, iLo, iHi,
586 _H, _wr, _wi, iLoZ, iHiZ, _Z);
587
588 bool failed = false;
589 if (! isIdentical(H, _H, " H", "_H")) {
590 std::cerr << "CXXLAPACK: H = " << H << std::endl;
591 std::cerr << "F77LAPACK: _H = " << _H << std::endl;
592 failed = true;
593 }
594
595 if (! isIdentical(wr, _wr, " wr", "_wr")) {
596 std::cerr << "CXXLAPACK: wr = " << wr << std::endl;
597 std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
598 failed = true;
599 }
600
601 if (! isIdentical(wi, _wi, " wi", "_wi")) {
602 std::cerr << "CXXLAPACK: wi = " << wi << std::endl;
603 std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
604 failed = true;
605 }
606
607 if (! isIdentical(Z, _Z, " Z", "_Z")) {
608 std::cerr << "CXXLAPACK: Z = " << Z << std::endl;
609 std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
610 failed = true;
611 }
612
613 if (! isIdentical(info, _info, " info", "_info")) {
614 std::cerr << "CXXLAPACK: info = " << info << std::endl;
615 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
616 failed = true;
617 }
618
619 if (failed) {
620 std::cerr << "H_org = " << H_org << std::endl;
621 std::cerr << "Z_org = " << Z_org << std::endl;
622 std::cerr << "wantT = " << wantT
623 << ", wantZ = " << wantZ
624 << ", iLo = " << iLo
625 << ", iHi = " << iHi
626 << std::endl;
627 std::cerr << "error in: lahqr.tcc" << std::endl;
628 ASSERT(0);
629 } else {
630 // std::cerr << "passed: lahqr.tcc" << std::endl;
631 }
632 # endif
633 return info;
634 }
635
636 //-- forwarding ----------------------------------------------------------------
637 template <typename IndexType, typename MH, typename VWR, typename VWI,
638 typename MZ>
639 IndexType
640 lahqr(bool wantT,
641 bool wantZ,
642 IndexType iLo,
643 IndexType iHi,
644 MH &&H,
645 VWR &&wr,
646 VWI &&wi,
647 IndexType iLoZ,
648 IndexType iHiZ,
649 MZ &&Z)
650 {
651 return lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);
652 }
653
654 } } // namespace lapack, flens
655
656 #endif // FLENS_LAPACK_EIG_LAHQR_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DLAHQR( WANTT, WANTZ, N, ILO, IHI, H, LDH, WR, WI,
35 $ ILOZ, IHIZ, Z, LDZ, INFO )
36 *
37 * -- LAPACK auxiliary routine (version 3.2) --
38 * Univ. of Tennessee, Univ. of California Berkeley,
39 * Univ. of Colorado Denver and NAG Ltd..
40 * November 2006
41 */
42
43 #ifndef FLENS_LAPACK_EIG_LAHQR_TCC
44 #define FLENS_LAPACK_EIG_LAHQR_TCC 1
45
46 #include <cmath>
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename IndexType, typename MH, typename VWR, typename VWI,
55 typename MZ>
56 IndexType
57 lahqr_generic(bool wantT,
58 bool wantZ,
59 IndexType iLo,
60 IndexType iHi,
61 GeMatrix<MH> &H,
62 DenseVector<VWR> &wr,
63 DenseVector<VWI> &wi,
64 IndexType iLoZ,
65 IndexType iHiZ,
66 GeMatrix<MZ> &Z)
67 {
68 using std::abs;
69 using std::max;
70 using std::min;
71
72 typedef typename GeMatrix<MH>::ElementType T;
73
74 const Underscore<IndexType> _;
75 const T Zero(0), One(1), Two(2);
76 const T Dat1 = T(3)/T(4),
77 Dat2 = T(-0.4375);
78 const IndexType itMax = 30;
79 const IndexType n = H.numRows();
80
81 typedef typename GeMatrix<MH>::VectorView VectorView;
82 T vBuffer[3];
83 VectorView v = typename VectorView::Engine(3, vBuffer);
84
85 //
86 // Quick return if possible
87 //
88 if (n==0) {
89 return 0;
90 }
91 if (iLo==iHi) {
92 wr(iLo) = H(iLo, iLo);
93 wi(iLo) = Zero;
94 return 0;
95 }
96 //
97 // ==== clear out the trash ====
98 //
99 for (IndexType j=iLo; j<=iHi-3; ++j) {
100 H(j+2, j) = Zero;
101 H(j+3, j) = Zero;
102 }
103 if (iLo<=iHi-2) {
104 H(iHi, iHi-2) = Zero;
105 }
106
107 const IndexType nh = iHi - iLo + 1;
108 //
109 // Set machine-dependent constants for the stopping criterion.
110 //
111 T safeMin = lamch<T>(SafeMin);
112 T safeMax = One / safeMin;
113 labad(safeMin, safeMax);
114 const T ulp = lamch<T>(Precision);
115 const T smallNum = safeMin*(T(nh)/ulp);
116 //
117 // i1 and i2 are the indices of the first row and last column of H
118 // to which transformations must be applied. If eigenvalues only are
119 // being computed, i1 and i2 are set inside the main loop.
120 //
121 IndexType i1 = -1, i2 = -1;
122 if (wantT) {
123 i1 = 1;
124 i2 = n;
125 }
126 //
127 // The main loop begins here. Variable i is the loop index and decreases from
128 // iHi to iLo in steps of 1 or 2. Each iteration of the loop works
129 // with the active submatrix in rows and columns l to i.
130 // Eigenvalues i+1 to iHi have already converged. Either l = iLo or
131 // H(l,l-1) is negligible so that the matrix splits.
132 //
133 IndexType i = iHi;
134 while (true) {
135 IndexType l = iLo;
136 if (i<iLo) {
137 break;
138 }
139 //
140 // Perform QR iterations on rows and columns iLo to i until a
141 // submatrix of order 1 or 2 splits off at the bottom because a
142 // subdiagonal element has become negligible.
143 //
144 IndexType its;
145 for (its=0; its<=itMax; ++its) {
146 //
147 // Look for a single small subdiagonal element.
148 //
149 IndexType k;
150 for (k=i; k>=l+1; --k) {
151 if (abs(H(k,k-1))<=smallNum) {
152 break;
153 }
154 T test = abs(H(k-1,k-1)) + abs(H(k,k));
155 if (test==Zero) {
156 if (k-2>=iLo) {
157 test += abs(H(k-1,k-2));
158 }
159 if (k+1<=iHi) {
160 test += abs(H(k+1,k));
161 }
162 }
163 // ==== The following is a conservative small subdiagonal
164 // . deflation criterion due to Ahues & Tisseur (LAWN 122,
165 // . 1997). It has better mathematical foundation and
166 // . improves accuracy in some cases. ====
167 if (abs(H(k,k-1))<=ulp*test) {
168 const T ab = max(abs(H(k, k-1)), abs(H(k-1, k)));
169 const T ba = min(abs(H(k, k-1)), abs(H(k-1, k)));
170 const T aa = max(abs(H(k, k)), abs(H(k-1, k-1)-H(k, k)));
171 const T bb = min(abs(H(k, k)), abs(H(k-1, k-1)-H(k, k)));
172 const T s = aa + ab;
173 if (ba*(ab/s)<=max(smallNum, ulp*(bb*(aa/s)))) {
174 break;
175 }
176 }
177 }
178 l = k;
179
180 if (l>iLo) {
181 //
182 // H(l,l-1) is negligible
183 //
184 H(l, l-1) = Zero;
185 }
186 //
187 // Exit from loop if a submatrix of order 1 or 2 has split off.
188 //
189 if (l>=i-1) {
190 break;
191 }
192 //
193 // Now the active submatrix is in rows and columns l to i. If
194 // eigenvalues only are being computed, only the active submatrix
195 // need be transformed.
196 //
197 if (!wantT) {
198 i1 = l;
199 i2 = i;
200 }
201
202 T H11, H12, H21, H22;
203 T rt1r, rt2r, rt1i, rt2i;
204 if (its==10) {
205 //
206 // Exceptional shift.
207 //
208 const T s = abs(H(l+1,l)) + abs(H(l+2, l+1));
209 H11 = Dat1*s + H(l,l);
210 H12 = Dat2*s;
211 H21 = s;
212 H22 = H11;
213 } else if (its==20) {
214 //
215 // Exceptional shift.
216 //
217 const T s = abs(H(i,i-1)) + abs(H(i-1,i-2));
218 H11 = Dat1*s + H(i,i);
219 H12 = Dat2*s;
220 H21 = s;
221 H22 = H11;
222 } else {
223 //
224 // Prepare to use Francis' double shift
225 // (i.e. 2nd degree generalized Rayleigh quotient)
226 //
227 H11 = H(i-1, i-1);
228 H21 = H( i, i-1);
229 H12 = H(i-1, i);
230 H22 = H( i, i);
231 }
232
233 const T s = abs(H11) + abs(H12) + abs(H21) + abs(H22);
234 if (s==Zero) {
235 rt1r = Zero;
236 rt1i = Zero;
237 rt2r = Zero;
238 rt2i = Zero;
239 } else {
240 H11 /= s;
241 H21 /= s;
242 H12 /= s;
243 H22 /= s;
244 const T tr = (H11+H22) / Two;
245 const T det = (H11-tr)*(H22-tr) - H12*H21;
246 const T rtDisc = sqrt(abs(det));
247 if (det>=Zero) {
248 //
249 // ==== complex conjugate shifts ====
250 //
251 rt1r = tr*s;
252 rt2r = rt1r;
253 rt1i = rtDisc*s;
254 rt2i = -rt1i;
255 } else {
256 //
257 // ==== real shifts (use only one of them) ====
258 //
259 rt1r = tr + rtDisc;
260 rt2r = tr - rtDisc;
261 if (abs(rt1r-H22)<=abs(rt2r-H22)) {
262 rt1r *= s;
263 rt2r = rt1r;
264 } else {
265 rt2r *= s;
266 rt1r = rt2r;
267 }
268 rt1i = Zero;
269 rt2i = Zero;
270 }
271 }
272 //
273 // Look for two consecutive small subdiagonal elements.
274 //
275 IndexType m;
276 for (m=i-2; m>=l; --m) {
277 // Determine the effect of starting the double-shift QR
278 // iteration at row m, and see if this would make H(m,m-1)
279 // negligible. (The following uses scaling to avoid
280 // overflows and most underflows.)
281 //
282 T H21S = H(m+1,m);
283 T s = abs(H(m,m)-rt2r) + abs(rt2i) + abs(H21S);
284
285 H21S = H(m+1,m)/s;
286 v(1) = H21S*H(m,m+1)
287 + (H(m,m)-rt1r)*((H(m,m)-rt2r)/s)
288 - rt1i*(rt2i/s);
289 v(2) = H21S*(H(m,m) + H(m+1,m+1)-rt1r-rt2r);
290 v(3) = H21S*H(m+2,m+1);
291
292 s = abs(v(1)) + abs(v(2)) + abs(v(3));
293 v(1) /= s;
294 v(2) /= s;
295 v(3) /= s;
296
297 if (m==l) {
298 break;
299 }
300
301 const T value1 = abs(H(m,m-1))*(abs(v(2))+abs(v(3)));
302 const T value2 = ulp*abs(v(1))
303 *(abs(H(m-1,m-1))+abs(H(m,m))+abs(H(m+1,m+1)));
304 if (value1<=value2) {
305 break;
306 }
307 }
308 //
309 // Double-shift QR step
310 //
311 for (k=m; k<=i-1; ++k) {
312 //
313 // The first iteration of this loop determines a reflection G
314 // from the vector v and applies it from left and right to H,
315 // thus creating a nonzero bulge below the subdiagonal.
316 //
317 // Each subsequent iteration determines a reflection G to
318 // restore the Hessenberg form in the (k-1)th column, and thus
319 // chases the bulge one step toward the bottom of the active
320 // submatrix. 'nr' is the order of G.
321 //
322 T t1, t2, t3, v2, v3;
323
324 IndexType nr = min(IndexType(3), i-k+1);
325 if (k>m) {
326 v(_(1,nr)) = H(_(k,k+nr-1),k-1);
327 }
328 larfg(nr, v(1), v(_(2,nr)), t1);
329 if (k>m) {
330 H(k, k-1) = v(1);
331 H(k+1,k-1) = Zero;
332 if (k<i-1) {
333 H(k+2,k-1) = Zero;
334 }
335 } else if (m>l) {
336 // ==== Use the following instead of
337 // . H( K, K-1 ) = -H( K, K-1 ) to
338 // . avoid a bug when v(2) and v(3)
339 // . underflow. ====
340 H(k, k-1) *= (One-t1);
341 }
342 v2 = v(2);
343 t2 = t1*v2;
344 if (nr==3) {
345 v3 = v(3);
346 t3 = t1*v3;
347 //
348 // Apply G from the left to transform the rows of the matrix
349 // in columns k to i2.
350 //
351 for (IndexType j=k; j<=i2; ++j) {
352 const T sum = H(k,j) + v2*H(k+1,j) + v3*H(k+2,j);
353 H(k, j) -= sum*t1;
354 H(k+1, j) -= sum*t2;
355 H(k+2, j) -= sum*t3;
356 }
357 //
358 // Apply G from the right to transform the columns of the
359 // matrix in rows i1 to min(k+3,i).
360 //
361 for (IndexType j=i1; j<=min(k+3,i); ++j) {
362 const T sum = H(j, k) + v2*H(j,k+1) + v3*H(j,k+2);
363 H(j, k ) -= sum*t1;
364 H(j, k+1) -= sum*t2;
365 H(j, k+2) -= sum*t3;
366 }
367
368 if (wantZ) {
369 //
370 // Accumulate transformations in the matrix Z
371 //
372 for (IndexType j=iLoZ; j<=iHiZ; ++j) {
373 const T sum = Z(j, k) + v2*Z(j, k+1) + v3*Z(j, k+2);
374 Z(j, k ) -= sum*t1;
375 Z(j, k+1) -= sum*t2;
376 Z(j, k+2) -= sum*t3;
377 }
378 }
379 } else if (nr==2) {
380 //
381 // Apply G from the left to transform the rows of the matrix
382 // in columns K to I2.
383 //
384 for (IndexType j=k; j<=i2; ++j) {
385 const T sum = H(k, j) + v2*H(k+1, j);
386 H(k, j) -= sum*t1;
387 H(k+1, j) -= sum*t2;
388 }
389 //
390 // Apply G from the right to transform the columns of the
391 // matrix in rows i1 to min(k+3,i).
392 //
393 for (IndexType j=i1; j<=i; ++j) {
394 const T sum = H(j, k) + v2*H(j, k+1);
395 H(j, k ) -= sum*t1;
396 H(j, k+1) -= sum*t2;
397 }
398
399 if (wantZ) {
400 //
401 // Accumulate transformations in the matrix Z
402 //
403 for (IndexType j=iLoZ; j<=iHiZ; ++j) {
404 const T sum = Z(j, k) + v2*Z(j, k+1);
405 Z(j, k ) -= sum*t1;
406 Z(j, k+1) -= sum*t2;
407 }
408 }
409 }
410 }
411 }
412 //
413 // Failure to converge in remaining number of iterations
414 //
415 if (its>itMax) {
416 return i;
417 }
418
419 if (l==i) {
420 //
421 // H(I,I-1) is negligible: one eigenvalue has converged.
422 //
423 wr(i) = H(i, i);
424 wi(i) = Zero;
425 } else if (l==i-1) {
426 //
427 // H(I-1,I-2) is negligible: a pair of eigenvalues have converged.
428 //
429 // Transform the 2-by-2 submatrix to standard Schur form,
430 // and compute and store the eigenvalues.
431 //
432 T cs, sn;
433 lanv2(H(i-1,i-1), H(i-1,i), H(i,i-1), H(i,i),
434 wr(i-1), wi(i-1), wr(i), wi(i),
435 cs, sn);
436
437 if (wantT) {
438 //
439 // Apply the transformation to the rest of H.
440 //
441 if (i2>i) {
442 const auto cols = _(i+1,i2);
443 blas::rot(H(i-1,cols), H(i,cols), cs, sn);
444 }
445 const auto rows = _(i1,i-2);
446 blas::rot(H(rows, i-1), H(rows, i), cs, sn);
447 }
448 if (wantZ) {
449 //
450 // Apply the transformation to Z.
451 //
452 const auto rows = _(iLoZ, iHiZ);
453 blas::rot(Z(rows, i-1), Z(rows, i), cs, sn);
454 }
455 }
456 //
457 // return to start of the main loop with new value of I.
458 //
459 i = l - 1;
460 }
461 return 0;
462 }
463
464 //== interface for native lapack ===============================================
465
466 #ifdef CHECK_CXXLAPACK
467
468 template <typename IndexType, typename MH, typename VWR, typename VWI,
469 typename MZ>
470 IndexType
471 lahqr_native(bool wantT,
472 bool wantZ,
473 IndexType iLo,
474 IndexType iHi,
475 GeMatrix<MH> &H,
476 DenseVector<VWR> &wr,
477 DenseVector<VWI> &wi,
478 IndexType iLoZ,
479 IndexType iHiZ,
480 GeMatrix<MZ> &Z)
481 {
482 typedef typename GeMatrix<MH>::ElementType T;
483
484 const LOGICAL WANTT = wantT;
485 const LOGICAL WANTZ = wantZ;
486 const INTEGER N = H.numRows();
487 const INTEGER ILO = iLo;
488 const INTEGER IHI = iHi;
489 const INTEGER LDH = H.leadingDimension();
490 const INTEGER ILOZ = iLoZ;
491 const INTEGER IHIZ = iHiZ;
492 const INTEGER LDZ = Z.leadingDimension();
493 INTEGER INFO;
494
495 if (IsSame<T,DOUBLE>::value) {
496 LAPACK_IMPL(dlahqr)(&WANTT,
497 &WANTZ,
498 &N,
499 &ILO,
500 &IHI,
501 H.data(),
502 &LDH,
503 wr.data(),
504 wi.data(),
505 &ILOZ,
506 &IHIZ,
507 Z.data(),
508 &LDZ,
509 &INFO);
510 } else {
511 ASSERT(0);
512 }
513 return INFO;
514 }
515
516 #endif // CHECK_CXXLAPACK
517
518 //== public interface ==========================================================
519
520 template <typename IndexType, typename MH, typename VWR, typename VWI,
521 typename MZ>
522 IndexType
523 lahqr(bool wantT,
524 bool wantZ,
525 IndexType iLo,
526 IndexType iHi,
527 GeMatrix<MH> &H,
528 DenseVector<VWR> &wr,
529 DenseVector<VWI> &wi,
530 IndexType iLoZ,
531 IndexType iHiZ,
532 GeMatrix<MZ> &Z)
533 {
534 LAPACK_DEBUG_OUT("lahqr");
535
536 //
537 // Test the input parameters
538 //
539 using std::max;
540
541 ASSERT(H.firstRow()==1);
542 ASSERT(H.firstCol()==1);
543 ASSERT(H.numRows()==H.numCols());
544 ASSERT(wr.firstIndex()==1);
545 ASSERT(wr.length()==H.numRows());
546 ASSERT(wi.firstIndex()==1);
547 ASSERT(wi.length()==H.numRows());
548 ASSERT(wantZ || (Z.numRows()==H.numCols()));
549 ASSERT(wantZ || (Z.numCols()==H.numCols()));
550
551 // 1 <= ILO <= max(1,IHI); IHI <= N.
552 ASSERT(1<=iLo);
553 ASSERT(iLo<=max(IndexType(1), iHi));
554 ASSERT(iHi<=H.numRows());
555
556 // 1 <= ILOZ <= ILO; IHI <= IHIZ <= N.
557 ASSERT(1<=iLoZ);
558 ASSERT(iLoZ<=iLo);
559 ASSERT(iHi<=iHiZ);
560 ASSERT(iHiZ<=H.numRows());
561
562 //
563 // Make copies of output arguments
564 //
565 # ifdef CHECK_CXXLAPACK
566 typename GeMatrix<MH>::NoView H_org = H;
567 typename GeMatrix<MH>::NoView Z_org = Z;
568
569 typename GeMatrix<MH>::NoView _H = H;
570 typename DenseVector<VWR>::NoView _wr = wr;
571 typename DenseVector<VWI>::NoView _wi = wi;
572 typename GeMatrix<MZ>::NoView _Z = Z;
573 # endif
574
575 //
576 // Call implementation
577 //
578 IndexType info = lahqr_generic(wantT, wantZ, iLo, iHi,
579 H, wr, wi, iLoZ, iHiZ, Z);
580
581 //
582 // Compare results
583 //
584 # ifdef CHECK_CXXLAPACK
585 IndexType _info = lahqr_native(wantT, wantZ, iLo, iHi,
586 _H, _wr, _wi, iLoZ, iHiZ, _Z);
587
588 bool failed = false;
589 if (! isIdentical(H, _H, " H", "_H")) {
590 std::cerr << "CXXLAPACK: H = " << H << std::endl;
591 std::cerr << "F77LAPACK: _H = " << _H << std::endl;
592 failed = true;
593 }
594
595 if (! isIdentical(wr, _wr, " wr", "_wr")) {
596 std::cerr << "CXXLAPACK: wr = " << wr << std::endl;
597 std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
598 failed = true;
599 }
600
601 if (! isIdentical(wi, _wi, " wi", "_wi")) {
602 std::cerr << "CXXLAPACK: wi = " << wi << std::endl;
603 std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
604 failed = true;
605 }
606
607 if (! isIdentical(Z, _Z, " Z", "_Z")) {
608 std::cerr << "CXXLAPACK: Z = " << Z << std::endl;
609 std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
610 failed = true;
611 }
612
613 if (! isIdentical(info, _info, " info", "_info")) {
614 std::cerr << "CXXLAPACK: info = " << info << std::endl;
615 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
616 failed = true;
617 }
618
619 if (failed) {
620 std::cerr << "H_org = " << H_org << std::endl;
621 std::cerr << "Z_org = " << Z_org << std::endl;
622 std::cerr << "wantT = " << wantT
623 << ", wantZ = " << wantZ
624 << ", iLo = " << iLo
625 << ", iHi = " << iHi
626 << std::endl;
627 std::cerr << "error in: lahqr.tcc" << std::endl;
628 ASSERT(0);
629 } else {
630 // std::cerr << "passed: lahqr.tcc" << std::endl;
631 }
632 # endif
633 return info;
634 }
635
636 //-- forwarding ----------------------------------------------------------------
637 template <typename IndexType, typename MH, typename VWR, typename VWI,
638 typename MZ>
639 IndexType
640 lahqr(bool wantT,
641 bool wantZ,
642 IndexType iLo,
643 IndexType iHi,
644 MH &&H,
645 VWR &&wr,
646 VWI &&wi,
647 IndexType iLoZ,
648 IndexType iHiZ,
649 MZ &&Z)
650 {
651 return lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLoZ, iHiZ, Z);
652 }
653
654 } } // namespace lapack, flens
655
656 #endif // FLENS_LAPACK_EIG_LAHQR_TCC