1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI,
36 $ VS, LDVS, WORK, LWORK, BWORK, INFO )
37 *
38 * -- LAPACK driver routine (version 3.2) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * November 2006
42 *
43 */
44
45 #ifndef FLENS_LAPACK_EIG_ES_TCC
46 #define FLENS_LAPACK_EIG_ES_TCC 1
47
48 #include <cmath>
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 Pair<typename GeMatrix<MA>::IndexType>
58 es_generic_wsq(bool computeSchurVectors,
59 const GeMatrix<MA> &A)
60 {
61 using std::max;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const IndexType n = A.numRows();
67
68 IndexType minWork, maxWork;
69
70 if (n==0) {
71 minWork = 1;
72 maxWork = 1;
73 } else {
74 maxWork = 2*n + n*ilaenv<T>(1, "GEHRD", "", n, 1, n, 0);
75 minWork = 3*n;
76
77 HSEQR::ComputeZ computeZ = (computeSchurVectors) ? HSEQR::NoInit
78 : HSEQR::No;
79 IndexType hsWork = hseqr_wsq(HSEQR::Schur, computeZ,
80 IndexType(1), n, A);
81
82 if (!computeSchurVectors) {
83 maxWork = max(maxWork, n + hsWork);
84 } else {
85 maxWork = max(maxWork,
86 2*n + (n-1)*ilaenv<T>(1, "ORGHR", "", n, 1, n));
87 maxWork = max(maxWork, n +hsWork);
88 }
89 }
90 return Pair<IndexType>(minWork, maxWork);
91 }
92
93 template <typename SelectFunction, typename MA, typename IndexType,
94 typename VWR, typename VWI, typename MVS,
95 typename VWORK, typename BWORK>
96 IndexType
97 es_generic(bool computeSchurVectors,
98 bool sortEigenvalues,
99 SelectFunction selectFunction,
100 GeMatrix<MA> &A,
101 IndexType &sDim,
102 DenseVector<VWR> &wr,
103 DenseVector<VWI> &wi,
104 GeMatrix<MVS> &VS,
105 DenseVector<VWORK> &work,
106 DenseVector<BWORK> &bWork)
107 {
108 using std::sqrt;
109
110 typedef typename GeMatrix<MA>::ElementType T;
111
112 const Underscore<IndexType> _;
113 const IndexType n = A.numRows();
114 const T Zero(0), One(1);
115
116 IndexType info = 0;
117 //
118 // Compute workspace
119 // (Note: Comments in the code beginning "Workspace:" describe the
120 // minimal amount of workspace needed at that point in the code,
121 // as well as the preferred amount for good performance.
122 // NB refers to the optimal block size for the immediately
123 // following subroutine, as returned by ILAENV.
124 // HSWORK refers to the workspace preferred by DHSEQR, as
125 // calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
126 // the worst case.)
127 //
128 Pair<IndexType> wsQuery = es_wsq(computeSchurVectors, A);
129 IndexType minWork = wsQuery.first;
130 IndexType maxWork = wsQuery.second;
131
132 if (work.length()!=0 && work.length()<minWork) {
133 ASSERT(0);
134 } else if (work.length()==0) {
135 work.resize(maxWork);
136 }
137
138 work(1) = maxWork;
139
140 //
141 // Quick return if possible
142 //
143 if (n==0) {
144 sDim = 0;
145 return info;
146 }
147 //
148 // Get machine constants
149 //
150 const T eps = lamch<T>(Precision);
151 T smallNum = lamch<T>(SafeMin);
152 T bigNum = One / smallNum;
153 labad(smallNum, bigNum);
154
155 smallNum = sqrt(smallNum) / eps;
156 bigNum = One / smallNum;
157 //
158 // Scale A if max element outside range [SMLNUM,BIGNUM]
159 //
160 const T normA = lan(MaximumNorm, A);
161
162 bool scaleA = false;
163 T cScale;
164
165 if (normA>Zero && normA<smallNum) {
166 scaleA = true;
167 cScale = smallNum;
168 } else if (normA>bigNum) {
169 scaleA = true;
170 cScale = bigNum;
171 }
172 if (scaleA) {
173 lascl(LASCL::FullMatrix, 0, 0, normA, cScale, A);
174 }
175 //
176 // Permute the matrix to make it more nearly triangular
177 // (Workspace: need N)
178 //
179 IndexType iBal = 1;
180 auto balWork = work(_(iBal,iBal+n-1));
181
182 IndexType iLo, iHi;
183 bal(BALANCE::PermuteOnly, A, iLo, iHi, balWork);
184 //
185 // Reduce to upper Hessenberg form
186 // (Workspace: need 3*N, prefer 2*N+N*NB)
187 //
188 IndexType iTau = iBal + n;
189 IndexType iWork = iTau + n;
190 IndexType lWork = work.length();
191
192 auto tau = work(_(iTau, iTau+n-2));
193 auto hrdWork = work(_(iWork, lWork));
194
195 hrd(iLo, iHi, A, tau, hrdWork);
196
197 if (computeSchurVectors) {
198 //
199 // Copy Householder vectors to VS
200 //
201 VS.lower() = A.lower();
202 //
203 // Generate orthogonal matrix in VS
204 // (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
205 //
206 orghr(iLo, iHi, VS, tau, hrdWork);
207 }
208
209 sDim = 0;
210 //
211 // Perform QR iteration, accumulating Schur vectors in VS if desired
212 // (Workspace: need N+1, prefer N+HSWORK (see comments) )
213 //
214 iWork = iTau;
215 auto hseqrWork = work(_(iWork, lWork));
216 HSEQR::ComputeZ computeZ = (computeSchurVectors) ? HSEQR::NoInit
217 : HSEQR::No;
218 IndexType iEval = hseqr(HSEQR::Schur, computeZ, iLo, iHi,
219 A, wr, wi, VS, hseqrWork);
220 if (iEval>0) {
221 info = iEval;
222 }
223 //
224 // Sort eigenvalues if desired
225 //
226 if (sortEigenvalues && info==0) {
227 if (scaleA) {
228 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wr);
229 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi);
230 }
231 for (IndexType i=1; i<=n; ++i) {
232 bWork(i) = selectFunction(wr(i), wi(i));
233 }
234 //
235 // Reorder eigenvalues and transform Schur vectors
236 // (Workspace: none needed)
237 //
238 T sep, s;
239 // TODO: I dislike that a dummy vector is needed
240 DenseVector<Array<int> > dummy(1);
241 IndexType iCond = trsen(TRSEN::None, computeSchurVectors, bWork,
242 A, VS, wr, wi, sDim, s, sep,
243 hseqrWork, dummy);
244 if (iCond>0) {
245 info = n + iCond;
246 }
247 }
248
249 if (computeSchurVectors) {
250 //
251 // Undo balancing
252 // (Workspace: need N)
253 //
254 bak(BALANCE::PermuteOnly, Right, iLo, iHi, balWork, VS);
255 }
256
257 if (scaleA) {
258 //
259 // Undo scaling for the Schur form of A
260 //
261 lascl(LASCL::UpperHessenberg, 0, 0, cScale, normA, A);
262 wr = A.diag(0);
263 if (cScale==smallNum) {
264 //
265 // If scaling back towards underflow, adjust WI if an
266 // offdiagonal element of a 2-by-2 block in the Schur form
267 // underflows.
268 //
269 IndexType i1, i2;
270 if (iEval>0) {
271 i1 = iEval + 1;
272 i2 = iHi - 1;
273 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi(_(1,iLo-1)));
274 } else if (sortEigenvalues) {
275 i1 = 1;
276 i2 = n - 1;
277 } else {
278 i1 = iLo;
279 i2 = iHi - 1;
280 }
281 IndexType iNext = i1-1;
282 for (IndexType i=i1; i<=i2; ++i) {
283 if (i<iNext) {
284 continue;
285 }
286 if (wi(i)==Zero) {
287 iNext = i+1;
288 } else {
289 if (A(i+1,i)==Zero) {
290 wi(i) = Zero;
291 wi(i+1) = Zero;
292 } else if (A(i+1,i)!=Zero && A(i,i+1)==Zero) {
293 wi(i) = Zero;
294 wi(i+1) = Zero;
295 if (i>1) {
296 blas::swap(A(_(1,i-1),i), A(_(1,i-1),i+1));
297 }
298 if (n>i+1) {
299 blas::swap(A(i,_(i+2,n)), A(i+1,_(i+2,n)));
300 }
301 if (computeSchurVectors) {
302 blas::swap(VS(_,i), VS(_,i+1));
303 }
304 A(i,i+1) = A(i+1,i);
305 A(i+1,i) = Zero;
306 }
307 iNext = i + 2;
308 }
309 }
310 }
311 //
312 // Undo scaling for the imaginary part of the eigenvalues
313 //
314 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi(_(iEval+1,n)));
315 }
316
317 if (sortEigenvalues && info==0) {
318 //
319 // Check if reordering successful
320 //
321 bool lastSelect = true;
322 bool last2Select = true;
323 sDim = 0;
324 IndexType ip = 0;
325 for (IndexType i=1; i<=n; ++i) {
326 bool currentSelect = selectFunction(wr(i), wi(i));
327 if (wi(i)==Zero) {
328 if (currentSelect) {
329 ++sDim;
330 }
331 ip = 0;
332 if (currentSelect && !lastSelect) {
333 info = n + 2;
334 }
335 } else {
336 if (ip==1) {
337 //
338 // Last eigenvalue of conjugate pair
339 //
340 currentSelect = currentSelect || lastSelect;
341 lastSelect = currentSelect;
342 if (currentSelect) {
343 sDim += 2;
344 }
345 ip = -1;
346 if (currentSelect && !last2Select) {
347 info = n + 2;
348 }
349 } else {
350 //
351 // First eigenvalue of conjugate pair
352 //
353 ip = 1;
354 }
355 }
356 last2Select = lastSelect;
357 lastSelect = currentSelect;
358 }
359 }
360
361 work(1) = maxWork;
362 return info;
363 }
364
365 //== interface for native lapack ===============================================
366
367 #ifdef CHECK_CXXLAPACK
368
369 template <typename MA>
370 typename GeMatrix<MA>::IndexType
371 es_native_wsq(bool computeSchurVectors,
372 const GeMatrix<MA> &A)
373 {
374 typedef typename GeMatrix<MA>::ElementType T;
375
376 const char JOBVS = (computeSchurVectors) ? 'V' : 'N';
377 const char SORT = 'N';
378 LOGICAL (*SELECT)(const T*, const T*) = 0;
379 const INTEGER N = A.numRows();
380 const INTEGER LDA = A.leadingDimension();
381 INTEGER SDIM = 0;
382 const INTEGER LDVS = LDA;
383 T DUMMY = 0;
384 T WORK = 0;
385 const INTEGER LWORK = -1;
386 LOGICAL BWORK;
387 INTEGER INFO;
388
389 if (IsSame<T,DOUBLE>::value) {
390 LAPACK_IMPL(dgees)(&JOBVS,
391 &SORT,
392 SELECT,
393 &N,
394 &DUMMY,
395 &LDA,
396 &SDIM,
397 &DUMMY,
398 &DUMMY,
399 &DUMMY,
400 &LDVS,
401 &WORK,
402 &LWORK,
403 &BWORK,
404 &INFO);
405 } else {
406 ASSERT(0);
407 }
408 ASSERT(INFO>=0);
409 return WORK;
410 }
411
412 template <typename SelectFunction, typename MA, typename IndexType,
413 typename VWR, typename VWI, typename MVS,
414 typename VWORK, typename BWORK>
415 IndexType
416 es_native(bool computeSchurVectors,
417 bool sortEigenvalues,
418 SelectFunction selectFunction,
419 GeMatrix<MA> &A,
420 IndexType &sDim,
421 DenseVector<VWR> &wr,
422 DenseVector<VWI> &wi,
423 GeMatrix<MVS> &VS,
424 DenseVector<VWORK> &work,
425 DenseVector<BWORK> &bWork)
426 {
427 typedef typename GeMatrix<MA>::ElementType T;
428
429 const char JOBVS = (computeSchurVectors) ? 'V' : 'N';
430 const char SORT = (sortEigenvalues) ? 'S' : 'N';
431 LOGICAL (*SELECT)(const T*, const T*) = selectFunction.select;
432 const INTEGER N = A.numRows();
433 const INTEGER LDA = A.leadingDimension();
434 INTEGER SDIM = sDim;
435 const INTEGER LDVS = VS.leadingDimension();
436 const INTEGER LWORK = work.length();
437 INTEGER INFO;
438
439 DenseVector<Array<LOGICAL> > _bWork(N);
440 for (IndexType i=1; i<=N; ++i) {
441 _bWork(i) = bWork(i);
442 }
443
444 if (IsSame<T,DOUBLE>::value) {
445 LAPACK_IMPL(dgees)(&JOBVS,
446 &SORT,
447 SELECT,
448 &N,
449 A.data(),
450 &LDA,
451 &SDIM,
452 wr.data(),
453 wi.data(),
454 VS.data(),
455 &LDVS,
456 work.data(),
457 &LWORK,
458 _bWork.data(),
459 &INFO);
460 } else {
461 ASSERT(0);
462 }
463 ASSERT(INFO>=0);
464
465 sDim = SDIM;
466 for (IndexType i=1; i<=N; ++i) {
467 bWork(i) = _bWork(i);
468 }
469
470 return INFO;
471 }
472
473 #endif // CHECK_CXXLAPACK
474
475 //== public interface ==========================================================
476
477 template <typename MA>
478 Pair<typename GeMatrix<MA>::IndexType>
479 es_wsq(bool computeSchurVectors,
480 const GeMatrix<MA> &A)
481 {
482 LAPACK_DEBUG_OUT("es_wsq");
483
484 //
485 // Test the input parameters
486 //
487 # ifndef NDEBUG
488 ASSERT(A.numRows()==A.numCols());
489 ASSERT(A.firstRow()==1);
490 ASSERT(A.firstCol()==1);
491 # endif
492
493 //
494 // Call implementation
495 //
496 const auto ws = es_generic_wsq(computeSchurVectors, A);
497
498 # ifdef CHECK_CXXLAPACK
499 //
500 // Compare results
501 //
502 auto optWorkSize = es_native_wsq(computeSchurVectors, A);
503
504 if (! isIdentical(optWorkSize, ws.second, "optWorkSize", "ws.second")) {
505 ASSERT(0);
506 }
507 # endif
508
509 return ws;
510
511 }
512
513 template <typename SelectFunction, typename MA, typename IndexType,
514 typename VWR, typename VWI, typename MVS,
515 typename VWORK, typename BWORK>
516 IndexType
517 es(bool computeSchurVectors,
518 bool sortEigenvalues,
519 SelectFunction selectFunction,
520 GeMatrix<MA> &A,
521 IndexType &sDim,
522 DenseVector<VWR> &wr,
523 DenseVector<VWI> &wi,
524 GeMatrix<MVS> &VS,
525 DenseVector<VWORK> &work,
526 DenseVector<BWORK> &bWork)
527 {
528 LAPACK_DEBUG_OUT("es");
529
530 //
531 // Test the input parameters
532 //
533 # ifndef NDEBUG
534 ASSERT(A.numRows()==A.numCols());
535 ASSERT(A.firstRow()==1);
536 ASSERT(A.firstCol()==1);
537 ASSERT(work.firstIndex()==1);
538
539 typename GeMatrix<MA>::IndexType n = A.numRows();
540
541 ASSERT(wr.firstIndex()==1);
542 ASSERT(wr.length()==n);
543
544 ASSERT(wi.firstIndex()==1);
545 ASSERT(wi.length()==n);
546
547 if (computeSchurVectors) {
548 ASSERT(VS.numRows()==VS.numCols());
549 ASSERT(VS.numRows()==n);
550 ASSERT(VS.firstRow()==1);
551 ASSERT(VS.firstCol()==1);
552 }
553
554 if (sortEigenvalues) {
555 ASSERT(bWork.firstIndex()==1);
556 ASSERT(bWork.length()>=n);
557 }
558 # endif
559
560 //
561 // Make copies of output arguments
562 //
563 # ifdef CHECK_CXXLAPACK
564
565 typename GeMatrix<MA>::NoView A_org = A;
566 IndexType sDim_org = sDim;
567 typename DenseVector<VWR>::NoView wr_org = wr;
568 typename DenseVector<VWI>::NoView wi_org = wi;
569 typename GeMatrix<MVS>::NoView VS_org = VS;
570 typename DenseVector<VWORK>::NoView work_org = work;
571 typename DenseVector<BWORK>::NoView bWork_org = bWork;
572
573 # endif
574
575 //
576 // Call implementation
577 //
578 IndexType result = es_generic(computeSchurVectors,
579 sortEigenvalues,
580 selectFunction,
581 A,
582 sDim,
583 wr,
584 wi,
585 VS,
586 work,
587 bWork);
588
589 # ifdef CHECK_CXXLAPACK
590 //
591 // Make copies of results computed by the generic implementation
592 //
593 typename GeMatrix<MA>::NoView A_generic = A;
594 IndexType sDim_generic = sDim;
595 typename DenseVector<VWR>::NoView wr_generic = wr;
596 typename DenseVector<VWI>::NoView wi_generic = wi;
597 typename GeMatrix<MVS>::NoView VS_generic = VS;
598 typename DenseVector<VWORK>::NoView work_generic = work;
599 typename DenseVector<BWORK>::NoView bWork_generic = bWork;
600 //
601 // restore output arguments
602 //
603 A = A_org;
604 sDim = sDim_org;
605 wr = wr_org;
606 wi = wi_org;
607 VS = VS_org;
608 work = work_org;
609 bWork = bWork_org;
610
611 //
612 // Compare generic results with results from the native implementation
613 //
614 IndexType _result = es_native(computeSchurVectors,
615 sortEigenvalues,
616 selectFunction,
617 A,
618 sDim,
619 wr,
620 wi,
621 VS,
622 work,
623 bWork);
624
625 bool failed = false;
626 if (! isIdentical(A_generic, A, "A_generic", "A")) {
627 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
628 std::cerr << "F77LAPACK: A = " << A << std::endl;
629 failed = true;
630 }
631
632 if (! isIdentical(sDim_generic, sDim, "sDim_generic", "sDim")) {
633 std::cerr << "CXXLAPACK: sDim_generic = " << sDim_generic << std::endl;
634 std::cerr << "F77LAPACK: sDim = " << sDim << std::endl;
635 failed = true;
636 }
637
638 if (! isIdentical(wr_generic, wr, "wr_generic", "wr")) {
639 std::cerr << "CXXLAPACK: wr_generic = " << wr_generic << std::endl;
640 std::cerr << "F77LAPACK: wr = " << wr << std::endl;
641 failed = true;
642 }
643
644 if (! isIdentical(wi_generic, wi, "wi_generic", "wi")) {
645 std::cerr << "CXXLAPACK: wi_generic = " << wi_generic << std::endl;
646 std::cerr << "F77LAPACK: wi = " << wi << std::endl;
647 failed = true;
648 }
649
650 if (! isIdentical(VS_generic, VS, "VS_generic", "VS")) {
651 std::cerr << "CXXLAPACK: VS_generic = " << VS_generic << std::endl;
652 std::cerr << "F77LAPACK: VS = " << VS << std::endl;
653 failed = true;
654 }
655
656 if (! isIdentical(work_generic, work, "work_generic", "work")) {
657 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
658 std::cerr << "F77LAPACK: work = " << work << std::endl;
659 failed = true;
660 }
661
662 if (! isIdentical(bWork_generic, bWork, "bWork_generic", "bWork")) {
663 std::cerr << "CXXLAPACK: bWork_generic = "
664 << bWork_generic << std::endl;
665 std::cerr << "F77LAPACK: bWork = " << bWork << std::endl;
666 failed = true;
667 }
668
669 if (! isIdentical(result, _result, " result", "_result")) {
670 std::cerr << "CXXLAPACK: result = " << result << std::endl;
671 std::cerr << "F77LAPACK: _result = " << _result << std::endl;
672 failed = true;
673 }
674
675 if (failed) {
676 ASSERT(0);
677 } else {
678 // std::cerr << "passed: es.tcc" << std::endl;
679 }
680
681 # endif
682
683 return result;
684 }
685
686 //-- forwarding ----------------------------------------------------------------
687 template <typename SelectFunction, typename MA, typename IndexType,
688 typename VWR, typename VWI, typename MVS,
689 typename VWORK, typename BWORK>
690 IndexType
691 es(bool computeSchurVectors,
692 bool sortEigenvalues,
693 SelectFunction selectFunction,
694 MA &&A,
695 IndexType &&sDim,
696 VWR &&wr,
697 VWI &&wi,
698 MVS &&VS,
699 VWORK &&work,
700 BWORK &&bWork)
701 {
702 CHECKPOINT_ENTER;
703 const IndexType info = es(computeSchurVectors,
704 sortEigenvalues,
705 selectFunction,
706 A,
707 sDim,
708 wr,
709 wi,
710 VS,
711 work,
712 bWork);
713 CHECKPOINT_LEAVE;
714
715 return info;
716 }
717
718 } } // namespace lapack, flens
719
720 #endif // FLENS_LAPACK_EIG_ES_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DGEES( JOBVS, SORT, SELECT, N, A, LDA, SDIM, WR, WI,
36 $ VS, LDVS, WORK, LWORK, BWORK, INFO )
37 *
38 * -- LAPACK driver routine (version 3.2) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * November 2006
42 *
43 */
44
45 #ifndef FLENS_LAPACK_EIG_ES_TCC
46 #define FLENS_LAPACK_EIG_ES_TCC 1
47
48 #include <cmath>
49 #include <flens/blas/blas.h>
50 #include <flens/lapack/lapack.h>
51
52 namespace flens { namespace lapack {
53
54 //== generic lapack implementation =============================================
55
56 template <typename MA>
57 Pair<typename GeMatrix<MA>::IndexType>
58 es_generic_wsq(bool computeSchurVectors,
59 const GeMatrix<MA> &A)
60 {
61 using std::max;
62
63 typedef typename GeMatrix<MA>::ElementType T;
64 typedef typename GeMatrix<MA>::IndexType IndexType;
65
66 const IndexType n = A.numRows();
67
68 IndexType minWork, maxWork;
69
70 if (n==0) {
71 minWork = 1;
72 maxWork = 1;
73 } else {
74 maxWork = 2*n + n*ilaenv<T>(1, "GEHRD", "", n, 1, n, 0);
75 minWork = 3*n;
76
77 HSEQR::ComputeZ computeZ = (computeSchurVectors) ? HSEQR::NoInit
78 : HSEQR::No;
79 IndexType hsWork = hseqr_wsq(HSEQR::Schur, computeZ,
80 IndexType(1), n, A);
81
82 if (!computeSchurVectors) {
83 maxWork = max(maxWork, n + hsWork);
84 } else {
85 maxWork = max(maxWork,
86 2*n + (n-1)*ilaenv<T>(1, "ORGHR", "", n, 1, n));
87 maxWork = max(maxWork, n +hsWork);
88 }
89 }
90 return Pair<IndexType>(minWork, maxWork);
91 }
92
93 template <typename SelectFunction, typename MA, typename IndexType,
94 typename VWR, typename VWI, typename MVS,
95 typename VWORK, typename BWORK>
96 IndexType
97 es_generic(bool computeSchurVectors,
98 bool sortEigenvalues,
99 SelectFunction selectFunction,
100 GeMatrix<MA> &A,
101 IndexType &sDim,
102 DenseVector<VWR> &wr,
103 DenseVector<VWI> &wi,
104 GeMatrix<MVS> &VS,
105 DenseVector<VWORK> &work,
106 DenseVector<BWORK> &bWork)
107 {
108 using std::sqrt;
109
110 typedef typename GeMatrix<MA>::ElementType T;
111
112 const Underscore<IndexType> _;
113 const IndexType n = A.numRows();
114 const T Zero(0), One(1);
115
116 IndexType info = 0;
117 //
118 // Compute workspace
119 // (Note: Comments in the code beginning "Workspace:" describe the
120 // minimal amount of workspace needed at that point in the code,
121 // as well as the preferred amount for good performance.
122 // NB refers to the optimal block size for the immediately
123 // following subroutine, as returned by ILAENV.
124 // HSWORK refers to the workspace preferred by DHSEQR, as
125 // calculated below. HSWORK is computed assuming ILO=1 and IHI=N,
126 // the worst case.)
127 //
128 Pair<IndexType> wsQuery = es_wsq(computeSchurVectors, A);
129 IndexType minWork = wsQuery.first;
130 IndexType maxWork = wsQuery.second;
131
132 if (work.length()!=0 && work.length()<minWork) {
133 ASSERT(0);
134 } else if (work.length()==0) {
135 work.resize(maxWork);
136 }
137
138 work(1) = maxWork;
139
140 //
141 // Quick return if possible
142 //
143 if (n==0) {
144 sDim = 0;
145 return info;
146 }
147 //
148 // Get machine constants
149 //
150 const T eps = lamch<T>(Precision);
151 T smallNum = lamch<T>(SafeMin);
152 T bigNum = One / smallNum;
153 labad(smallNum, bigNum);
154
155 smallNum = sqrt(smallNum) / eps;
156 bigNum = One / smallNum;
157 //
158 // Scale A if max element outside range [SMLNUM,BIGNUM]
159 //
160 const T normA = lan(MaximumNorm, A);
161
162 bool scaleA = false;
163 T cScale;
164
165 if (normA>Zero && normA<smallNum) {
166 scaleA = true;
167 cScale = smallNum;
168 } else if (normA>bigNum) {
169 scaleA = true;
170 cScale = bigNum;
171 }
172 if (scaleA) {
173 lascl(LASCL::FullMatrix, 0, 0, normA, cScale, A);
174 }
175 //
176 // Permute the matrix to make it more nearly triangular
177 // (Workspace: need N)
178 //
179 IndexType iBal = 1;
180 auto balWork = work(_(iBal,iBal+n-1));
181
182 IndexType iLo, iHi;
183 bal(BALANCE::PermuteOnly, A, iLo, iHi, balWork);
184 //
185 // Reduce to upper Hessenberg form
186 // (Workspace: need 3*N, prefer 2*N+N*NB)
187 //
188 IndexType iTau = iBal + n;
189 IndexType iWork = iTau + n;
190 IndexType lWork = work.length();
191
192 auto tau = work(_(iTau, iTau+n-2));
193 auto hrdWork = work(_(iWork, lWork));
194
195 hrd(iLo, iHi, A, tau, hrdWork);
196
197 if (computeSchurVectors) {
198 //
199 // Copy Householder vectors to VS
200 //
201 VS.lower() = A.lower();
202 //
203 // Generate orthogonal matrix in VS
204 // (Workspace: need 3*N-1, prefer 2*N+(N-1)*NB)
205 //
206 orghr(iLo, iHi, VS, tau, hrdWork);
207 }
208
209 sDim = 0;
210 //
211 // Perform QR iteration, accumulating Schur vectors in VS if desired
212 // (Workspace: need N+1, prefer N+HSWORK (see comments) )
213 //
214 iWork = iTau;
215 auto hseqrWork = work(_(iWork, lWork));
216 HSEQR::ComputeZ computeZ = (computeSchurVectors) ? HSEQR::NoInit
217 : HSEQR::No;
218 IndexType iEval = hseqr(HSEQR::Schur, computeZ, iLo, iHi,
219 A, wr, wi, VS, hseqrWork);
220 if (iEval>0) {
221 info = iEval;
222 }
223 //
224 // Sort eigenvalues if desired
225 //
226 if (sortEigenvalues && info==0) {
227 if (scaleA) {
228 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wr);
229 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi);
230 }
231 for (IndexType i=1; i<=n; ++i) {
232 bWork(i) = selectFunction(wr(i), wi(i));
233 }
234 //
235 // Reorder eigenvalues and transform Schur vectors
236 // (Workspace: none needed)
237 //
238 T sep, s;
239 // TODO: I dislike that a dummy vector is needed
240 DenseVector<Array<int> > dummy(1);
241 IndexType iCond = trsen(TRSEN::None, computeSchurVectors, bWork,
242 A, VS, wr, wi, sDim, s, sep,
243 hseqrWork, dummy);
244 if (iCond>0) {
245 info = n + iCond;
246 }
247 }
248
249 if (computeSchurVectors) {
250 //
251 // Undo balancing
252 // (Workspace: need N)
253 //
254 bak(BALANCE::PermuteOnly, Right, iLo, iHi, balWork, VS);
255 }
256
257 if (scaleA) {
258 //
259 // Undo scaling for the Schur form of A
260 //
261 lascl(LASCL::UpperHessenberg, 0, 0, cScale, normA, A);
262 wr = A.diag(0);
263 if (cScale==smallNum) {
264 //
265 // If scaling back towards underflow, adjust WI if an
266 // offdiagonal element of a 2-by-2 block in the Schur form
267 // underflows.
268 //
269 IndexType i1, i2;
270 if (iEval>0) {
271 i1 = iEval + 1;
272 i2 = iHi - 1;
273 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi(_(1,iLo-1)));
274 } else if (sortEigenvalues) {
275 i1 = 1;
276 i2 = n - 1;
277 } else {
278 i1 = iLo;
279 i2 = iHi - 1;
280 }
281 IndexType iNext = i1-1;
282 for (IndexType i=i1; i<=i2; ++i) {
283 if (i<iNext) {
284 continue;
285 }
286 if (wi(i)==Zero) {
287 iNext = i+1;
288 } else {
289 if (A(i+1,i)==Zero) {
290 wi(i) = Zero;
291 wi(i+1) = Zero;
292 } else if (A(i+1,i)!=Zero && A(i,i+1)==Zero) {
293 wi(i) = Zero;
294 wi(i+1) = Zero;
295 if (i>1) {
296 blas::swap(A(_(1,i-1),i), A(_(1,i-1),i+1));
297 }
298 if (n>i+1) {
299 blas::swap(A(i,_(i+2,n)), A(i+1,_(i+2,n)));
300 }
301 if (computeSchurVectors) {
302 blas::swap(VS(_,i), VS(_,i+1));
303 }
304 A(i,i+1) = A(i+1,i);
305 A(i+1,i) = Zero;
306 }
307 iNext = i + 2;
308 }
309 }
310 }
311 //
312 // Undo scaling for the imaginary part of the eigenvalues
313 //
314 lascl(LASCL::FullMatrix, 0, 0, cScale, normA, wi(_(iEval+1,n)));
315 }
316
317 if (sortEigenvalues && info==0) {
318 //
319 // Check if reordering successful
320 //
321 bool lastSelect = true;
322 bool last2Select = true;
323 sDim = 0;
324 IndexType ip = 0;
325 for (IndexType i=1; i<=n; ++i) {
326 bool currentSelect = selectFunction(wr(i), wi(i));
327 if (wi(i)==Zero) {
328 if (currentSelect) {
329 ++sDim;
330 }
331 ip = 0;
332 if (currentSelect && !lastSelect) {
333 info = n + 2;
334 }
335 } else {
336 if (ip==1) {
337 //
338 // Last eigenvalue of conjugate pair
339 //
340 currentSelect = currentSelect || lastSelect;
341 lastSelect = currentSelect;
342 if (currentSelect) {
343 sDim += 2;
344 }
345 ip = -1;
346 if (currentSelect && !last2Select) {
347 info = n + 2;
348 }
349 } else {
350 //
351 // First eigenvalue of conjugate pair
352 //
353 ip = 1;
354 }
355 }
356 last2Select = lastSelect;
357 lastSelect = currentSelect;
358 }
359 }
360
361 work(1) = maxWork;
362 return info;
363 }
364
365 //== interface for native lapack ===============================================
366
367 #ifdef CHECK_CXXLAPACK
368
369 template <typename MA>
370 typename GeMatrix<MA>::IndexType
371 es_native_wsq(bool computeSchurVectors,
372 const GeMatrix<MA> &A)
373 {
374 typedef typename GeMatrix<MA>::ElementType T;
375
376 const char JOBVS = (computeSchurVectors) ? 'V' : 'N';
377 const char SORT = 'N';
378 LOGICAL (*SELECT)(const T*, const T*) = 0;
379 const INTEGER N = A.numRows();
380 const INTEGER LDA = A.leadingDimension();
381 INTEGER SDIM = 0;
382 const INTEGER LDVS = LDA;
383 T DUMMY = 0;
384 T WORK = 0;
385 const INTEGER LWORK = -1;
386 LOGICAL BWORK;
387 INTEGER INFO;
388
389 if (IsSame<T,DOUBLE>::value) {
390 LAPACK_IMPL(dgees)(&JOBVS,
391 &SORT,
392 SELECT,
393 &N,
394 &DUMMY,
395 &LDA,
396 &SDIM,
397 &DUMMY,
398 &DUMMY,
399 &DUMMY,
400 &LDVS,
401 &WORK,
402 &LWORK,
403 &BWORK,
404 &INFO);
405 } else {
406 ASSERT(0);
407 }
408 ASSERT(INFO>=0);
409 return WORK;
410 }
411
412 template <typename SelectFunction, typename MA, typename IndexType,
413 typename VWR, typename VWI, typename MVS,
414 typename VWORK, typename BWORK>
415 IndexType
416 es_native(bool computeSchurVectors,
417 bool sortEigenvalues,
418 SelectFunction selectFunction,
419 GeMatrix<MA> &A,
420 IndexType &sDim,
421 DenseVector<VWR> &wr,
422 DenseVector<VWI> &wi,
423 GeMatrix<MVS> &VS,
424 DenseVector<VWORK> &work,
425 DenseVector<BWORK> &bWork)
426 {
427 typedef typename GeMatrix<MA>::ElementType T;
428
429 const char JOBVS = (computeSchurVectors) ? 'V' : 'N';
430 const char SORT = (sortEigenvalues) ? 'S' : 'N';
431 LOGICAL (*SELECT)(const T*, const T*) = selectFunction.select;
432 const INTEGER N = A.numRows();
433 const INTEGER LDA = A.leadingDimension();
434 INTEGER SDIM = sDim;
435 const INTEGER LDVS = VS.leadingDimension();
436 const INTEGER LWORK = work.length();
437 INTEGER INFO;
438
439 DenseVector<Array<LOGICAL> > _bWork(N);
440 for (IndexType i=1; i<=N; ++i) {
441 _bWork(i) = bWork(i);
442 }
443
444 if (IsSame<T,DOUBLE>::value) {
445 LAPACK_IMPL(dgees)(&JOBVS,
446 &SORT,
447 SELECT,
448 &N,
449 A.data(),
450 &LDA,
451 &SDIM,
452 wr.data(),
453 wi.data(),
454 VS.data(),
455 &LDVS,
456 work.data(),
457 &LWORK,
458 _bWork.data(),
459 &INFO);
460 } else {
461 ASSERT(0);
462 }
463 ASSERT(INFO>=0);
464
465 sDim = SDIM;
466 for (IndexType i=1; i<=N; ++i) {
467 bWork(i) = _bWork(i);
468 }
469
470 return INFO;
471 }
472
473 #endif // CHECK_CXXLAPACK
474
475 //== public interface ==========================================================
476
477 template <typename MA>
478 Pair<typename GeMatrix<MA>::IndexType>
479 es_wsq(bool computeSchurVectors,
480 const GeMatrix<MA> &A)
481 {
482 LAPACK_DEBUG_OUT("es_wsq");
483
484 //
485 // Test the input parameters
486 //
487 # ifndef NDEBUG
488 ASSERT(A.numRows()==A.numCols());
489 ASSERT(A.firstRow()==1);
490 ASSERT(A.firstCol()==1);
491 # endif
492
493 //
494 // Call implementation
495 //
496 const auto ws = es_generic_wsq(computeSchurVectors, A);
497
498 # ifdef CHECK_CXXLAPACK
499 //
500 // Compare results
501 //
502 auto optWorkSize = es_native_wsq(computeSchurVectors, A);
503
504 if (! isIdentical(optWorkSize, ws.second, "optWorkSize", "ws.second")) {
505 ASSERT(0);
506 }
507 # endif
508
509 return ws;
510
511 }
512
513 template <typename SelectFunction, typename MA, typename IndexType,
514 typename VWR, typename VWI, typename MVS,
515 typename VWORK, typename BWORK>
516 IndexType
517 es(bool computeSchurVectors,
518 bool sortEigenvalues,
519 SelectFunction selectFunction,
520 GeMatrix<MA> &A,
521 IndexType &sDim,
522 DenseVector<VWR> &wr,
523 DenseVector<VWI> &wi,
524 GeMatrix<MVS> &VS,
525 DenseVector<VWORK> &work,
526 DenseVector<BWORK> &bWork)
527 {
528 LAPACK_DEBUG_OUT("es");
529
530 //
531 // Test the input parameters
532 //
533 # ifndef NDEBUG
534 ASSERT(A.numRows()==A.numCols());
535 ASSERT(A.firstRow()==1);
536 ASSERT(A.firstCol()==1);
537 ASSERT(work.firstIndex()==1);
538
539 typename GeMatrix<MA>::IndexType n = A.numRows();
540
541 ASSERT(wr.firstIndex()==1);
542 ASSERT(wr.length()==n);
543
544 ASSERT(wi.firstIndex()==1);
545 ASSERT(wi.length()==n);
546
547 if (computeSchurVectors) {
548 ASSERT(VS.numRows()==VS.numCols());
549 ASSERT(VS.numRows()==n);
550 ASSERT(VS.firstRow()==1);
551 ASSERT(VS.firstCol()==1);
552 }
553
554 if (sortEigenvalues) {
555 ASSERT(bWork.firstIndex()==1);
556 ASSERT(bWork.length()>=n);
557 }
558 # endif
559
560 //
561 // Make copies of output arguments
562 //
563 # ifdef CHECK_CXXLAPACK
564
565 typename GeMatrix<MA>::NoView A_org = A;
566 IndexType sDim_org = sDim;
567 typename DenseVector<VWR>::NoView wr_org = wr;
568 typename DenseVector<VWI>::NoView wi_org = wi;
569 typename GeMatrix<MVS>::NoView VS_org = VS;
570 typename DenseVector<VWORK>::NoView work_org = work;
571 typename DenseVector<BWORK>::NoView bWork_org = bWork;
572
573 # endif
574
575 //
576 // Call implementation
577 //
578 IndexType result = es_generic(computeSchurVectors,
579 sortEigenvalues,
580 selectFunction,
581 A,
582 sDim,
583 wr,
584 wi,
585 VS,
586 work,
587 bWork);
588
589 # ifdef CHECK_CXXLAPACK
590 //
591 // Make copies of results computed by the generic implementation
592 //
593 typename GeMatrix<MA>::NoView A_generic = A;
594 IndexType sDim_generic = sDim;
595 typename DenseVector<VWR>::NoView wr_generic = wr;
596 typename DenseVector<VWI>::NoView wi_generic = wi;
597 typename GeMatrix<MVS>::NoView VS_generic = VS;
598 typename DenseVector<VWORK>::NoView work_generic = work;
599 typename DenseVector<BWORK>::NoView bWork_generic = bWork;
600 //
601 // restore output arguments
602 //
603 A = A_org;
604 sDim = sDim_org;
605 wr = wr_org;
606 wi = wi_org;
607 VS = VS_org;
608 work = work_org;
609 bWork = bWork_org;
610
611 //
612 // Compare generic results with results from the native implementation
613 //
614 IndexType _result = es_native(computeSchurVectors,
615 sortEigenvalues,
616 selectFunction,
617 A,
618 sDim,
619 wr,
620 wi,
621 VS,
622 work,
623 bWork);
624
625 bool failed = false;
626 if (! isIdentical(A_generic, A, "A_generic", "A")) {
627 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
628 std::cerr << "F77LAPACK: A = " << A << std::endl;
629 failed = true;
630 }
631
632 if (! isIdentical(sDim_generic, sDim, "sDim_generic", "sDim")) {
633 std::cerr << "CXXLAPACK: sDim_generic = " << sDim_generic << std::endl;
634 std::cerr << "F77LAPACK: sDim = " << sDim << std::endl;
635 failed = true;
636 }
637
638 if (! isIdentical(wr_generic, wr, "wr_generic", "wr")) {
639 std::cerr << "CXXLAPACK: wr_generic = " << wr_generic << std::endl;
640 std::cerr << "F77LAPACK: wr = " << wr << std::endl;
641 failed = true;
642 }
643
644 if (! isIdentical(wi_generic, wi, "wi_generic", "wi")) {
645 std::cerr << "CXXLAPACK: wi_generic = " << wi_generic << std::endl;
646 std::cerr << "F77LAPACK: wi = " << wi << std::endl;
647 failed = true;
648 }
649
650 if (! isIdentical(VS_generic, VS, "VS_generic", "VS")) {
651 std::cerr << "CXXLAPACK: VS_generic = " << VS_generic << std::endl;
652 std::cerr << "F77LAPACK: VS = " << VS << std::endl;
653 failed = true;
654 }
655
656 if (! isIdentical(work_generic, work, "work_generic", "work")) {
657 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
658 std::cerr << "F77LAPACK: work = " << work << std::endl;
659 failed = true;
660 }
661
662 if (! isIdentical(bWork_generic, bWork, "bWork_generic", "bWork")) {
663 std::cerr << "CXXLAPACK: bWork_generic = "
664 << bWork_generic << std::endl;
665 std::cerr << "F77LAPACK: bWork = " << bWork << std::endl;
666 failed = true;
667 }
668
669 if (! isIdentical(result, _result, " result", "_result")) {
670 std::cerr << "CXXLAPACK: result = " << result << std::endl;
671 std::cerr << "F77LAPACK: _result = " << _result << std::endl;
672 failed = true;
673 }
674
675 if (failed) {
676 ASSERT(0);
677 } else {
678 // std::cerr << "passed: es.tcc" << std::endl;
679 }
680
681 # endif
682
683 return result;
684 }
685
686 //-- forwarding ----------------------------------------------------------------
687 template <typename SelectFunction, typename MA, typename IndexType,
688 typename VWR, typename VWI, typename MVS,
689 typename VWORK, typename BWORK>
690 IndexType
691 es(bool computeSchurVectors,
692 bool sortEigenvalues,
693 SelectFunction selectFunction,
694 MA &&A,
695 IndexType &&sDim,
696 VWR &&wr,
697 VWI &&wi,
698 MVS &&VS,
699 VWORK &&work,
700 BWORK &&bWork)
701 {
702 CHECKPOINT_ENTER;
703 const IndexType info = es(computeSchurVectors,
704 sortEigenvalues,
705 selectFunction,
706 A,
707 sDim,
708 wr,
709 wi,
710 VS,
711 work,
712 bWork);
713 CHECKPOINT_LEAVE;
714
715 return info;
716 }
717
718 } } // namespace lapack, flens
719
720 #endif // FLENS_LAPACK_EIG_ES_TCC