1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z,
36 $ LDZ, WORK, LWORK, INFO )
37 *
38 * -- LAPACK computational routine (version 3.2.2) --
39 * Univ. of Tennessee, Univ. of California Berkeley,
40 * Univ. of Colorado Denver and NAG Ltd..
41 * June 2010
42 */
43
44 #ifndef FLENS_LAPACK_EIG_HSEQR_TCC
45 #define FLENS_LAPACK_EIG_HSEQR_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename T>
55 void
56 _makeHseqrOpt(char opt[3], HSEQR::Job job, HSEQR::ComputeZ computeZ)
57 {
58 opt[0] = (job==HSEQR::Eigenvalues) ? 'E' : 'S';
59 if (computeZ==HSEQR::No) {
60 opt[1] = 'N';
61 } else if (computeZ==HSEQR::Init) {
62 opt[1] = 'I';
63 } else {
64 opt[1] = 'V';
65 }
66 opt[2] = 0;
67 }
68
69 template <typename IndexType, typename MH>
70 IndexType
71 hseqr_generic_wsq(HSEQR::Job job,
72 HSEQR::ComputeZ computeZ,
73 IndexType iLo,
74 IndexType iHi,
75 const GeMatrix<MH> &H)
76 {
77 using std::max;
78
79 IndexType n = H.numRows();
80 const bool wantT = (job==HSEQR::Schur);
81 const bool wantZ = (computeZ!=HSEQR::No);
82
83 IndexType info = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
84 info = max(max(IndexType(1), n), info);
85 return info;
86 }
87
88 template <typename IndexType, typename MH, typename VWR, typename VWI,
89 typename MZ, typename VWORK>
90 IndexType
91 hseqr_generic(HSEQR::Job job,
92 HSEQR::ComputeZ computeZ,
93 IndexType iLo,
94 IndexType iHi,
95 GeMatrix<MH> &H,
96 DenseVector<VWR> &wr,
97 DenseVector<VWI> &wi,
98 GeMatrix<MZ> &Z,
99 DenseVector<VWORK> &work)
100 {
101 using std::max;
102 using std::min;
103
104 typedef typename GeMatrix<MH>::ElementType T;
105 typedef typename GeMatrix<MH>::View GeMatrixView;
106 typedef typename GeMatrix<MH>::VectorView DenseVectorView;
107
108 const IndexType nTiny = 11;
109 // ==== NL allocates some local workspace to help small matrices
110 // . through a rare DLAHQR failure. NL .GT. NTINY = 11 is
111 // . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
112 // . mended. (The default value of NMIN is 75.) Using NL = 49
113 // . allows up to six simultaneous shifts and a 16-by-16
114 // . deflation window. ====
115 const IndexType nl = 49;
116 T hlBuffer[nl*nl], worklBuffer[nl];
117 GeMatrixView Hl = typename GeMatrixView::Engine(nl, nl, hlBuffer, nl);
118 DenseVectorView workl = typename DenseVectorView::Engine(nl, worklBuffer);
119
120 const Underscore<IndexType> _;
121 const IndexType n = H.numCols();
122 const T Zero(0), One(1);
123
124 IndexType info = 0;
125
126 //
127 // ==== Decode and check the input parameters. ====
128 //
129 const bool wantT = (job==HSEQR::Schur);
130 const bool initZ = (computeZ==HSEQR::Init);
131 const bool wantZ = (computeZ!=HSEQR::No);
132
133 if (work.length()>0) {
134 work(1) = n;
135 } else {
136 //
137 // ==== Perform and apply a workspace query ====
138 //
139 IndexType lWorkOpt = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
140 work.resize(lWorkOpt);
141 }
142
143 if (n==0) {
144 //
145 // ==== Quick return in case n = 0; nothing to do. ====
146 //
147 return info;
148 }
149 //
150 // ==== copy eigenvalues isolated by DGEBAL ====
151 //
152 for (IndexType i=1; i<=iLo-1; ++i) {
153 wr(i) = H(i,i);
154 wi(i) = Zero;
155 }
156
157 for (IndexType i=iHi+1; i<=n; ++i) {
158 wr(i) = H(i,i);
159 wi(i) = Zero;
160 }
161 //
162 // ==== Initialize Z, if requested ====
163 //
164 if (initZ) {
165 Z = Zero;
166 Z.diag(0) = One;
167 }
168 //
169 // ==== Quick return if possible ====
170 //
171 if (iLo==iHi) {
172 wr(iLo) = H(iLo, iLo);
173 wi(iLo) = Zero;
174 return info;
175 }
176 //
177 // ==== lahqr/laqr0 crossover point ====
178 //
179 char opt[3];
180 _makeHseqrOpt<T>(opt, job, computeZ);
181 IndexType nMin = ilaenv<T>(12, "HSEQR", opt, n, iLo, iHi, work.length());
182
183 nMin= max(nTiny, nMin);
184 //
185 // ==== laqr0 for big matrices; lahqr for small ones ====
186 //
187 if (n>nMin) {
188 info = laqr0(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z, work);
189 } else {
190 //
191 // ==== Small matrix ====
192 //
193 info = lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z);
194
195 if (info>0) {
196 //
197 // ==== A rare lahqr failure! laqr0 sometimes succeeds
198 // . when lahqr fails. ====
199 //
200 const IndexType kBot = info;
201 if (n>=nl) {
202 //
203 // ==== Larger matrices have enough subdiagonal scratch
204 // . space to call laqr0 directly. ====
205 //
206 info = laqr0(wantT, wantZ,
207 iLo, kBot, H, wr, wi,
208 iLo, iHi, Z, work);
209 } else {
210 //
211 // ==== Tiny matrices don't have enough subdiagonal
212 // . scratch space to benefit from DLAQR0. Hence,
213 // . tiny matrices must be copied into a larger
214 // . array before calling DLAQR0. ====
215 //
216 Hl(_(1,n),_(1,n)) = H;
217 Hl(n+1, n) = Zero;
218 Hl(_(1,nl),_(n+1,nl)) = Zero;
219 info = laqr0(wantT, wantZ,
220 iLo, kBot, Hl, wr, wi,
221 iLo, iHi, Z, workl);
222 if (wantT || (info!=0)) {
223 H = Hl(_(1,n),_(1,n));
224 }
225 }
226 }
227 }
228 //
229 // ==== Clear out the trash, if necessary. ====
230 //
231 if ((wantT || (info!=0)) && (n>2)) {
232 H(_(3,n),_(1,n-2)).lower() = Zero;
233 }
234 //
235 // ==== Ensure reported workspace size is backward-compatible with
236 // . previous LAPACK versions. ====
237 //
238 work(1) = max(T(max(IndexType(1),n)), work(1));
239 return info;
240 }
241
242 //== interface for native lapack ===============================================
243
244 #ifdef CHECK_CXXLAPACK
245
246 template <typename IndexType, typename MH>
247 IndexType
248 hseqr_native_wsq(HSEQR::Job job,
249 HSEQR::ComputeZ computeZ,
250 IndexType iLo,
251 IndexType iHi,
252 const GeMatrix<MH> &H)
253 {
254 typedef typename GeMatrix<MH>::ElementType T;
255
256 const char JOB = getF77LapackChar<HSEQR::Job>(job);
257 const char COMPZ = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
258 const INTEGER N = H.numCols();
259 const INTEGER ILO = iLo;
260 const INTEGER IHI = iHi;
261 const INTEGER LDH = H.leadingDimension();
262 const INTEGER LDZ = H.leadingDimension();
263 T WORK = 0;
264 T DUMMY = 0;
265 const INTEGER LWORK = -1;
266 INTEGER INFO;
267
268 if (IsSame<T,DOUBLE>::value) {
269 LAPACK_IMPL(dhseqr)(&JOB,
270 &COMPZ,
271 &N,
272 &ILO,
273 &IHI,
274 &DUMMY,
275 &LDH,
276 &DUMMY,
277 &DUMMY,
278 &DUMMY,
279 &LDZ,
280 &WORK,
281 &LWORK,
282 &INFO);
283 } else {
284 ASSERT(0);
285 }
286
287 # ifndef NDEBUG
288 if (INFO!=0) {
289 std::cerr << "INFO = " << INFO << std::endl;
290 ASSERT(INFO==0);
291 }
292 # endif
293
294 return WORK;
295 }
296
297 template <typename IndexType, typename MH, typename VWR, typename VWI,
298 typename MZ, typename VWORK>
299 IndexType
300 hseqr_native(HSEQR::Job job,
301 HSEQR::ComputeZ computeZ,
302 IndexType iLo,
303 IndexType iHi,
304 GeMatrix<MH> &H,
305 DenseVector<VWR> &wr,
306 DenseVector<VWI> &wi,
307 GeMatrix<MZ> &Z,
308 DenseVector<VWORK> &work)
309 {
310 typedef typename GeMatrix<MH>::ElementType T;
311
312 const char JOB = getF77LapackChar<HSEQR::Job>(job);
313 const char COMPZ = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
314 const INTEGER N = H.numCols();
315 const INTEGER ILO = iLo;
316 const INTEGER IHI = iHi;
317 const INTEGER LDH = H.leadingDimension();
318 const INTEGER LDZ = Z.leadingDimension();
319 const INTEGER LWORK = work.length();
320 INTEGER INFO;
321
322 if (IsSame<T,DOUBLE>::value) {
323 LAPACK_IMPL(dhseqr)(&JOB,
324 &COMPZ,
325 &N,
326 &ILO,
327 &IHI,
328 H.data(),
329 &LDH,
330 wr.data(),
331 wi.data(),
332 Z.data(),
333 &LDZ,
334 work.data(),
335 &LWORK,
336 &INFO);
337 } else {
338 ASSERT(0);
339 }
340 ASSERT(INFO>=0);
341
342 return INFO;
343 }
344
345 #endif // CHECK_CXXLAPACK
346
347 //== public interface ==========================================================
348
349 template <typename IndexType, typename MH>
350 IndexType
351 hseqr_wsq(HSEQR::Job job,
352 HSEQR::ComputeZ computeZ,
353 IndexType iLo,
354 IndexType iHi,
355 const GeMatrix<MH> &H)
356 {
357 LAPACK_DEBUG_OUT("hseqr_wsq");
358
359 //
360 // Test the input parameters
361 //
362 ASSERT(H.firstRow()==1);
363 ASSERT(H.firstCol()==1);
364 ASSERT(H.numRows()==H.numCols());
365
366 //
367 // Call implementation
368 //
369 IndexType info = hseqr_generic_wsq(job, computeZ, iLo, iHi, H);
370
371 # ifdef CHECK_CXXLAPACK
372 //
373 // Compare results
374 //
375 IndexType _info = hseqr_native_wsq(job, computeZ, iLo, iHi, H);
376
377 if (info!=_info) {
378 std::cerr << "CXXLAPACK: info = " << info << std::endl;
379 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
380 ASSERT(0);
381 }
382 # endif
383 return info;
384 }
385
386 template <typename IndexType, typename MH, typename VWR, typename VWI,
387 typename MZ, typename VWORK>
388 IndexType
389 hseqr(HSEQR::Job job,
390 HSEQR::ComputeZ computeZ,
391 IndexType iLo,
392 IndexType iHi,
393 GeMatrix<MH> &H,
394 DenseVector<VWR> &wr,
395 DenseVector<VWI> &wi,
396 GeMatrix<MZ> &Z,
397 DenseVector<VWORK> &work)
398 {
399 LAPACK_DEBUG_OUT("hseqr");
400
401 //
402 // Test the input parameters
403 //
404 ASSERT(H.firstRow()==1);
405 ASSERT(H.firstCol()==1);
406 ASSERT(H.numRows()==H.numCols());
407 ASSERT(wr.firstIndex()==1);
408 ASSERT(wr.length()==H.numCols());
409 ASSERT(wi.firstIndex()==1);
410 ASSERT(wi.length()==H.numCols());
411 ASSERT((computeZ==HSEQR::No) || (Z.numRows()==H.numCols()));
412 ASSERT((computeZ==HSEQR::No) || (Z.numCols()==H.numCols()));
413 ASSERT((work.length()==0) || (work.length()>=H.numCols()));
414
415 # ifdef CHECK_CXXLAPACK
416 //
417 // Make copies of output arguments
418 //
419 typename GeMatrix<MH>::NoView H_org = H;
420
421 typename GeMatrix<MH>::NoView _H = H;
422 typename DenseVector<VWR>::NoView _wr = wr;
423 typename DenseVector<VWI>::NoView _wi = wi;
424 typename GeMatrix<MZ>::NoView _Z = Z;
425 typename DenseVector<VWORK>::NoView _work = work;
426 # endif
427
428 //
429 // Call implementation
430 //
431 IndexType info = hseqr_generic(job, computeZ, iLo, iHi, H,
432 wr, wi, Z, work);
433
434 # ifdef CHECK_CXXLAPACK
435 //
436 // Compare results
437 //
438 // TODO: also check workspace query directly!
439 if (_work.length()==0) {
440 _work.resize(work.length());
441 }
442
443 IndexType _info = hseqr_native(job, computeZ, iLo, iHi, _H,
444 _wr, _wi, _Z, _work);
445
446 bool failed = false;
447 if (! isIdentical(H, _H, " H", "_H")) {
448 std::cerr << "CXXLAPACK: H = " << H << std::endl;
449 std::cerr << "F77LAPACK: _H = " << _H << std::endl;
450 failed = true;
451 }
452
453 if (! isIdentical(wr, _wr, " wr", "_wr")) {
454 std::cerr << "CXXLAPACK: wr = " << wr << std::endl;
455 std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
456 failed = true;
457 }
458
459 if (! isIdentical(wi, _wi, " wi", "_wi")) {
460 std::cerr << "CXXLAPACK: wi = " << wi << std::endl;
461 std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
462 failed = true;
463 }
464
465 if (! isIdentical(Z, _Z, " Z", "_Z")) {
466 std::cerr << "CXXLAPACK: Z = " << Z << std::endl;
467 std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
468 failed = true;
469 }
470
471 if (! isIdentical(info, _info, " info", "_info")) {
472 std::cerr << "CXXLAPACK: info = " << info << std::endl;
473 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
474 failed = true;
475 }
476
477 if (! isIdentical(work, _work, " work", "_work")) {
478 std::cerr << "CXXLAPACK: work = " << work << std::endl;
479 std::cerr << "F77LAPACK: _work = " << _work << std::endl;
480 failed = true;
481 }
482
483 if (failed) {
484 std::cerr << "H_org = " << H_org << std::endl;
485
486 std::cerr << "job = " << job
487 << ", computeZ = " << computeZ
488 << ", iLo = " << iLo
489 << ", iHi = " << iHi
490 << ", H.numRows() = " << H.numRows()
491 << std::endl;
492 ASSERT(0);
493 }
494 # endif
495 return info;
496 }
497
498 //-- forwarding ----------------------------------------------------------------
499 template <typename IndexType, typename MH>
500 IndexType
501 hseqr_wsq(HSEQR::Job job,
502 HSEQR::ComputeZ computeZ,
503 IndexType iLo,
504 IndexType iHi,
505 const MH &&H)
506 {
507 CHECKPOINT_ENTER;
508 const IndexType result = hseqr_wsq(job, computeZ, iLo, iHi, H);
509 CHECKPOINT_LEAVE;
510
511 return result;
512 }
513
514 template <typename IndexType, typename MH, typename VWR, typename VWI,
515 typename MZ, typename VWORK>
516 IndexType
517 hseqr(HSEQR::Job job,
518 HSEQR::ComputeZ computeZ,
519 IndexType iLo,
520 IndexType iHi,
521 MH &&H,
522 VWR &&wr,
523 VWI &&wi,
524 MZ &&Z,
525 VWORK &&work)
526 {
527 CHECKPOINT_ENTER;
528 const IndexType result = hseqr(job, computeZ, iLo, iHi, H, wr, wi, Z, work);
529 CHECKPOINT_LEAVE;
530
531 return result;
532 }
533
534 } } // namespace lapack, flens
535
536 #endif // FLENS_LAPACK_EIG_HSEQR_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z,
36 $ LDZ, WORK, LWORK, INFO )
37 *
38 * -- LAPACK computational routine (version 3.2.2) --
39 * Univ. of Tennessee, Univ. of California Berkeley,
40 * Univ. of Colorado Denver and NAG Ltd..
41 * June 2010
42 */
43
44 #ifndef FLENS_LAPACK_EIG_HSEQR_TCC
45 #define FLENS_LAPACK_EIG_HSEQR_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename T>
55 void
56 _makeHseqrOpt(char opt[3], HSEQR::Job job, HSEQR::ComputeZ computeZ)
57 {
58 opt[0] = (job==HSEQR::Eigenvalues) ? 'E' : 'S';
59 if (computeZ==HSEQR::No) {
60 opt[1] = 'N';
61 } else if (computeZ==HSEQR::Init) {
62 opt[1] = 'I';
63 } else {
64 opt[1] = 'V';
65 }
66 opt[2] = 0;
67 }
68
69 template <typename IndexType, typename MH>
70 IndexType
71 hseqr_generic_wsq(HSEQR::Job job,
72 HSEQR::ComputeZ computeZ,
73 IndexType iLo,
74 IndexType iHi,
75 const GeMatrix<MH> &H)
76 {
77 using std::max;
78
79 IndexType n = H.numRows();
80 const bool wantT = (job==HSEQR::Schur);
81 const bool wantZ = (computeZ!=HSEQR::No);
82
83 IndexType info = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
84 info = max(max(IndexType(1), n), info);
85 return info;
86 }
87
88 template <typename IndexType, typename MH, typename VWR, typename VWI,
89 typename MZ, typename VWORK>
90 IndexType
91 hseqr_generic(HSEQR::Job job,
92 HSEQR::ComputeZ computeZ,
93 IndexType iLo,
94 IndexType iHi,
95 GeMatrix<MH> &H,
96 DenseVector<VWR> &wr,
97 DenseVector<VWI> &wi,
98 GeMatrix<MZ> &Z,
99 DenseVector<VWORK> &work)
100 {
101 using std::max;
102 using std::min;
103
104 typedef typename GeMatrix<MH>::ElementType T;
105 typedef typename GeMatrix<MH>::View GeMatrixView;
106 typedef typename GeMatrix<MH>::VectorView DenseVectorView;
107
108 const IndexType nTiny = 11;
109 // ==== NL allocates some local workspace to help small matrices
110 // . through a rare DLAHQR failure. NL .GT. NTINY = 11 is
111 // . required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
112 // . mended. (The default value of NMIN is 75.) Using NL = 49
113 // . allows up to six simultaneous shifts and a 16-by-16
114 // . deflation window. ====
115 const IndexType nl = 49;
116 T hlBuffer[nl*nl], worklBuffer[nl];
117 GeMatrixView Hl = typename GeMatrixView::Engine(nl, nl, hlBuffer, nl);
118 DenseVectorView workl = typename DenseVectorView::Engine(nl, worklBuffer);
119
120 const Underscore<IndexType> _;
121 const IndexType n = H.numCols();
122 const T Zero(0), One(1);
123
124 IndexType info = 0;
125
126 //
127 // ==== Decode and check the input parameters. ====
128 //
129 const bool wantT = (job==HSEQR::Schur);
130 const bool initZ = (computeZ==HSEQR::Init);
131 const bool wantZ = (computeZ!=HSEQR::No);
132
133 if (work.length()>0) {
134 work(1) = n;
135 } else {
136 //
137 // ==== Perform and apply a workspace query ====
138 //
139 IndexType lWorkOpt = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
140 work.resize(lWorkOpt);
141 }
142
143 if (n==0) {
144 //
145 // ==== Quick return in case n = 0; nothing to do. ====
146 //
147 return info;
148 }
149 //
150 // ==== copy eigenvalues isolated by DGEBAL ====
151 //
152 for (IndexType i=1; i<=iLo-1; ++i) {
153 wr(i) = H(i,i);
154 wi(i) = Zero;
155 }
156
157 for (IndexType i=iHi+1; i<=n; ++i) {
158 wr(i) = H(i,i);
159 wi(i) = Zero;
160 }
161 //
162 // ==== Initialize Z, if requested ====
163 //
164 if (initZ) {
165 Z = Zero;
166 Z.diag(0) = One;
167 }
168 //
169 // ==== Quick return if possible ====
170 //
171 if (iLo==iHi) {
172 wr(iLo) = H(iLo, iLo);
173 wi(iLo) = Zero;
174 return info;
175 }
176 //
177 // ==== lahqr/laqr0 crossover point ====
178 //
179 char opt[3];
180 _makeHseqrOpt<T>(opt, job, computeZ);
181 IndexType nMin = ilaenv<T>(12, "HSEQR", opt, n, iLo, iHi, work.length());
182
183 nMin= max(nTiny, nMin);
184 //
185 // ==== laqr0 for big matrices; lahqr for small ones ====
186 //
187 if (n>nMin) {
188 info = laqr0(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z, work);
189 } else {
190 //
191 // ==== Small matrix ====
192 //
193 info = lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z);
194
195 if (info>0) {
196 //
197 // ==== A rare lahqr failure! laqr0 sometimes succeeds
198 // . when lahqr fails. ====
199 //
200 const IndexType kBot = info;
201 if (n>=nl) {
202 //
203 // ==== Larger matrices have enough subdiagonal scratch
204 // . space to call laqr0 directly. ====
205 //
206 info = laqr0(wantT, wantZ,
207 iLo, kBot, H, wr, wi,
208 iLo, iHi, Z, work);
209 } else {
210 //
211 // ==== Tiny matrices don't have enough subdiagonal
212 // . scratch space to benefit from DLAQR0. Hence,
213 // . tiny matrices must be copied into a larger
214 // . array before calling DLAQR0. ====
215 //
216 Hl(_(1,n),_(1,n)) = H;
217 Hl(n+1, n) = Zero;
218 Hl(_(1,nl),_(n+1,nl)) = Zero;
219 info = laqr0(wantT, wantZ,
220 iLo, kBot, Hl, wr, wi,
221 iLo, iHi, Z, workl);
222 if (wantT || (info!=0)) {
223 H = Hl(_(1,n),_(1,n));
224 }
225 }
226 }
227 }
228 //
229 // ==== Clear out the trash, if necessary. ====
230 //
231 if ((wantT || (info!=0)) && (n>2)) {
232 H(_(3,n),_(1,n-2)).lower() = Zero;
233 }
234 //
235 // ==== Ensure reported workspace size is backward-compatible with
236 // . previous LAPACK versions. ====
237 //
238 work(1) = max(T(max(IndexType(1),n)), work(1));
239 return info;
240 }
241
242 //== interface for native lapack ===============================================
243
244 #ifdef CHECK_CXXLAPACK
245
246 template <typename IndexType, typename MH>
247 IndexType
248 hseqr_native_wsq(HSEQR::Job job,
249 HSEQR::ComputeZ computeZ,
250 IndexType iLo,
251 IndexType iHi,
252 const GeMatrix<MH> &H)
253 {
254 typedef typename GeMatrix<MH>::ElementType T;
255
256 const char JOB = getF77LapackChar<HSEQR::Job>(job);
257 const char COMPZ = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
258 const INTEGER N = H.numCols();
259 const INTEGER ILO = iLo;
260 const INTEGER IHI = iHi;
261 const INTEGER LDH = H.leadingDimension();
262 const INTEGER LDZ = H.leadingDimension();
263 T WORK = 0;
264 T DUMMY = 0;
265 const INTEGER LWORK = -1;
266 INTEGER INFO;
267
268 if (IsSame<T,DOUBLE>::value) {
269 LAPACK_IMPL(dhseqr)(&JOB,
270 &COMPZ,
271 &N,
272 &ILO,
273 &IHI,
274 &DUMMY,
275 &LDH,
276 &DUMMY,
277 &DUMMY,
278 &DUMMY,
279 &LDZ,
280 &WORK,
281 &LWORK,
282 &INFO);
283 } else {
284 ASSERT(0);
285 }
286
287 # ifndef NDEBUG
288 if (INFO!=0) {
289 std::cerr << "INFO = " << INFO << std::endl;
290 ASSERT(INFO==0);
291 }
292 # endif
293
294 return WORK;
295 }
296
297 template <typename IndexType, typename MH, typename VWR, typename VWI,
298 typename MZ, typename VWORK>
299 IndexType
300 hseqr_native(HSEQR::Job job,
301 HSEQR::ComputeZ computeZ,
302 IndexType iLo,
303 IndexType iHi,
304 GeMatrix<MH> &H,
305 DenseVector<VWR> &wr,
306 DenseVector<VWI> &wi,
307 GeMatrix<MZ> &Z,
308 DenseVector<VWORK> &work)
309 {
310 typedef typename GeMatrix<MH>::ElementType T;
311
312 const char JOB = getF77LapackChar<HSEQR::Job>(job);
313 const char COMPZ = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
314 const INTEGER N = H.numCols();
315 const INTEGER ILO = iLo;
316 const INTEGER IHI = iHi;
317 const INTEGER LDH = H.leadingDimension();
318 const INTEGER LDZ = Z.leadingDimension();
319 const INTEGER LWORK = work.length();
320 INTEGER INFO;
321
322 if (IsSame<T,DOUBLE>::value) {
323 LAPACK_IMPL(dhseqr)(&JOB,
324 &COMPZ,
325 &N,
326 &ILO,
327 &IHI,
328 H.data(),
329 &LDH,
330 wr.data(),
331 wi.data(),
332 Z.data(),
333 &LDZ,
334 work.data(),
335 &LWORK,
336 &INFO);
337 } else {
338 ASSERT(0);
339 }
340 ASSERT(INFO>=0);
341
342 return INFO;
343 }
344
345 #endif // CHECK_CXXLAPACK
346
347 //== public interface ==========================================================
348
349 template <typename IndexType, typename MH>
350 IndexType
351 hseqr_wsq(HSEQR::Job job,
352 HSEQR::ComputeZ computeZ,
353 IndexType iLo,
354 IndexType iHi,
355 const GeMatrix<MH> &H)
356 {
357 LAPACK_DEBUG_OUT("hseqr_wsq");
358
359 //
360 // Test the input parameters
361 //
362 ASSERT(H.firstRow()==1);
363 ASSERT(H.firstCol()==1);
364 ASSERT(H.numRows()==H.numCols());
365
366 //
367 // Call implementation
368 //
369 IndexType info = hseqr_generic_wsq(job, computeZ, iLo, iHi, H);
370
371 # ifdef CHECK_CXXLAPACK
372 //
373 // Compare results
374 //
375 IndexType _info = hseqr_native_wsq(job, computeZ, iLo, iHi, H);
376
377 if (info!=_info) {
378 std::cerr << "CXXLAPACK: info = " << info << std::endl;
379 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
380 ASSERT(0);
381 }
382 # endif
383 return info;
384 }
385
386 template <typename IndexType, typename MH, typename VWR, typename VWI,
387 typename MZ, typename VWORK>
388 IndexType
389 hseqr(HSEQR::Job job,
390 HSEQR::ComputeZ computeZ,
391 IndexType iLo,
392 IndexType iHi,
393 GeMatrix<MH> &H,
394 DenseVector<VWR> &wr,
395 DenseVector<VWI> &wi,
396 GeMatrix<MZ> &Z,
397 DenseVector<VWORK> &work)
398 {
399 LAPACK_DEBUG_OUT("hseqr");
400
401 //
402 // Test the input parameters
403 //
404 ASSERT(H.firstRow()==1);
405 ASSERT(H.firstCol()==1);
406 ASSERT(H.numRows()==H.numCols());
407 ASSERT(wr.firstIndex()==1);
408 ASSERT(wr.length()==H.numCols());
409 ASSERT(wi.firstIndex()==1);
410 ASSERT(wi.length()==H.numCols());
411 ASSERT((computeZ==HSEQR::No) || (Z.numRows()==H.numCols()));
412 ASSERT((computeZ==HSEQR::No) || (Z.numCols()==H.numCols()));
413 ASSERT((work.length()==0) || (work.length()>=H.numCols()));
414
415 # ifdef CHECK_CXXLAPACK
416 //
417 // Make copies of output arguments
418 //
419 typename GeMatrix<MH>::NoView H_org = H;
420
421 typename GeMatrix<MH>::NoView _H = H;
422 typename DenseVector<VWR>::NoView _wr = wr;
423 typename DenseVector<VWI>::NoView _wi = wi;
424 typename GeMatrix<MZ>::NoView _Z = Z;
425 typename DenseVector<VWORK>::NoView _work = work;
426 # endif
427
428 //
429 // Call implementation
430 //
431 IndexType info = hseqr_generic(job, computeZ, iLo, iHi, H,
432 wr, wi, Z, work);
433
434 # ifdef CHECK_CXXLAPACK
435 //
436 // Compare results
437 //
438 // TODO: also check workspace query directly!
439 if (_work.length()==0) {
440 _work.resize(work.length());
441 }
442
443 IndexType _info = hseqr_native(job, computeZ, iLo, iHi, _H,
444 _wr, _wi, _Z, _work);
445
446 bool failed = false;
447 if (! isIdentical(H, _H, " H", "_H")) {
448 std::cerr << "CXXLAPACK: H = " << H << std::endl;
449 std::cerr << "F77LAPACK: _H = " << _H << std::endl;
450 failed = true;
451 }
452
453 if (! isIdentical(wr, _wr, " wr", "_wr")) {
454 std::cerr << "CXXLAPACK: wr = " << wr << std::endl;
455 std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
456 failed = true;
457 }
458
459 if (! isIdentical(wi, _wi, " wi", "_wi")) {
460 std::cerr << "CXXLAPACK: wi = " << wi << std::endl;
461 std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
462 failed = true;
463 }
464
465 if (! isIdentical(Z, _Z, " Z", "_Z")) {
466 std::cerr << "CXXLAPACK: Z = " << Z << std::endl;
467 std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
468 failed = true;
469 }
470
471 if (! isIdentical(info, _info, " info", "_info")) {
472 std::cerr << "CXXLAPACK: info = " << info << std::endl;
473 std::cerr << "F77LAPACK: _info = " << _info << std::endl;
474 failed = true;
475 }
476
477 if (! isIdentical(work, _work, " work", "_work")) {
478 std::cerr << "CXXLAPACK: work = " << work << std::endl;
479 std::cerr << "F77LAPACK: _work = " << _work << std::endl;
480 failed = true;
481 }
482
483 if (failed) {
484 std::cerr << "H_org = " << H_org << std::endl;
485
486 std::cerr << "job = " << job
487 << ", computeZ = " << computeZ
488 << ", iLo = " << iLo
489 << ", iHi = " << iHi
490 << ", H.numRows() = " << H.numRows()
491 << std::endl;
492 ASSERT(0);
493 }
494 # endif
495 return info;
496 }
497
498 //-- forwarding ----------------------------------------------------------------
499 template <typename IndexType, typename MH>
500 IndexType
501 hseqr_wsq(HSEQR::Job job,
502 HSEQR::ComputeZ computeZ,
503 IndexType iLo,
504 IndexType iHi,
505 const MH &&H)
506 {
507 CHECKPOINT_ENTER;
508 const IndexType result = hseqr_wsq(job, computeZ, iLo, iHi, H);
509 CHECKPOINT_LEAVE;
510
511 return result;
512 }
513
514 template <typename IndexType, typename MH, typename VWR, typename VWI,
515 typename MZ, typename VWORK>
516 IndexType
517 hseqr(HSEQR::Job job,
518 HSEQR::ComputeZ computeZ,
519 IndexType iLo,
520 IndexType iHi,
521 MH &&H,
522 VWR &&wr,
523 VWI &&wi,
524 MZ &&Z,
525 VWORK &&work)
526 {
527 CHECKPOINT_ENTER;
528 const IndexType result = hseqr(job, computeZ, iLo, iHi, H, wr, wi, Z, work);
529 CHECKPOINT_LEAVE;
530
531 return result;
532 }
533
534 } } // namespace lapack, flens
535
536 #endif // FLENS_LAPACK_EIG_HSEQR_TCC