1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DHSEQR( JOB, COMPZ, N, ILO, IHI, H, LDH, WR, WI, Z,
 36      $                   LDZ, WORK, LWORK, INFO )
 37  *
 38  *  -- LAPACK computational routine (version 3.2.2) --
 39  *     Univ. of Tennessee, Univ. of California Berkeley,
 40  *     Univ. of Colorado Denver and NAG Ltd..
 41  *     June 2010
 42  */
 43 
 44 #ifndef FLENS_LAPACK_EIG_HSEQR_TCC
 45 #define FLENS_LAPACK_EIG_HSEQR_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename T>
 55 void
 56 _makeHseqrOpt(char opt[3], HSEQR::Job job, HSEQR::ComputeZ computeZ)
 57 {
 58     opt[0] = (job==HSEQR::Eigenvalues) ? 'E' : 'S';
 59     if (computeZ==HSEQR::No) {
 60         opt[1] = 'N';
 61     } else if (computeZ==HSEQR::Init) {
 62         opt[1] = 'I';
 63     } else {
 64         opt[1] = 'V';
 65     }
 66     opt[2] = 0;
 67 }
 68 
 69 template <typename IndexType, typename MH>
 70 IndexType
 71 hseqr_generic_wsq(HSEQR::Job            job,
 72                   HSEQR::ComputeZ       computeZ,
 73                   IndexType             iLo,
 74                   IndexType             iHi,
 75                   const GeMatrix<MH>    &H)
 76 {
 77     using std::max;
 78 
 79     IndexType n = H.numRows();
 80     const bool wantT = (job==HSEQR::Schur);
 81     const bool wantZ = (computeZ!=HSEQR::No);
 82 
 83     IndexType info = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
 84     info = max(max(IndexType(1), n), info);
 85     return info;
 86 }
 87 
 88 template <typename IndexType, typename MH, typename VWR, typename VWI,
 89           typename MZ, typename VWORK>
 90 IndexType
 91 hseqr_generic(HSEQR::Job            job,
 92               HSEQR::ComputeZ       computeZ,
 93               IndexType             iLo,
 94               IndexType             iHi,
 95               GeMatrix<MH>          &H,
 96               DenseVector<VWR>      &wr,
 97               DenseVector<VWI>      &wi,
 98               GeMatrix<MZ>          &Z,
 99               DenseVector<VWORK>    &work)
100 {
101     using std::max;
102     using std::min;
103 
104     typedef typename GeMatrix<MH>::ElementType   T;
105     typedef typename GeMatrix<MH>::View          GeMatrixView;
106     typedef typename GeMatrix<MH>::VectorView    DenseVectorView;
107 
108     const IndexType nTiny = 11;
109 //  ==== NL allocates some local workspace to help small matrices
110 //  .    through a rare DLAHQR failure.  NL .GT. NTINY = 11 is
111 //  .    required and NL .LE. NMIN = ILAENV(ISPEC=12,...) is recom-
112 //  .    mended.  (The default value of NMIN is 75.)  Using NL = 49
113 //  .    allows up to six simultaneous shifts and a 16-by-16
114 //  .    deflation window.  ====
115     const           IndexType nl = 49;
116     T               hlBuffer[nl*nl], worklBuffer[nl];
117     GeMatrixView    Hl    = typename GeMatrixView::Engine(nl, nl, hlBuffer, nl);
118     DenseVectorView workl = typename DenseVectorView::Engine(nl, worklBuffer);
119 
120     const Underscore<IndexType>     _;
121     const IndexType                 n = H.numCols();
122     const T                         Zero(0), One(1);
123 
124     IndexType info = 0;
125 
126 //
127 //  ==== Decode and check the input parameters. ====
128 //
129     const bool wantT = (job==HSEQR::Schur);
130     const bool initZ = (computeZ==HSEQR::Init);
131     const bool wantZ = (computeZ!=HSEQR::No);
132 
133     if (work.length()>0) {
134         work(1) = n;
135     } else {
136 //
137 //      ==== Perform and apply a workspace query ====
138 //
139         IndexType lWorkOpt = laqr0_wsq(wantT, wantZ, iLo, iHi, H);
140         work.resize(lWorkOpt);
141     }
142 
143     if (n==0) {
144 //
145 //      ==== Quick return in case n = 0; nothing to do. ====
146 //
147         return info;
148     }
149 //
150 //  ==== copy eigenvalues isolated by DGEBAL ====
151 //
152     for (IndexType i=1; i<=iLo-1; ++i) {
153         wr(i) = H(i,i);
154         wi(i) = Zero;
155     }
156 
157     for (IndexType i=iHi+1; i<=n; ++i) {
158         wr(i) = H(i,i);
159         wi(i) = Zero;
160     }
161 //
162 //  ==== Initialize Z, if requested ====
163 //
164     if (initZ) {
165         Z = Zero;
166         Z.diag(0) = One;
167     }
168 //
169 //  ==== Quick return if possible ====
170 //
171     if (iLo==iHi) {
172         wr(iLo) = H(iLo, iLo);
173         wi(iLo) = Zero;
174         return info;
175     }
176 //
177 //  ==== lahqr/laqr0 crossover point ====
178 //
179     char opt[3];
180     _makeHseqrOpt<T>(opt, job, computeZ);
181     IndexType nMin = ilaenv<T>(12"HSEQR", opt, n, iLo, iHi, work.length());
182 
183     nMin= max(nTiny, nMin);
184 //
185 //  ==== laqr0 for big matrices; lahqr for small ones ====
186 //
187     if (n>nMin) {
188         info = laqr0(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z, work);
189     } else {
190 //
191 //      ==== Small matrix ====
192 //
193         info = lahqr(wantT, wantZ, iLo, iHi, H, wr, wi, iLo, iHi, Z);
194 
195         if (info>0) {
196 //
197 //          ==== A rare lahqr failure!  laqr0 sometimes succeeds
198 //          .    when lahqr fails. ====
199 //
200             const IndexType kBot = info;
201             if (n>=nl) {
202 //
203 //              ==== Larger matrices have enough subdiagonal scratch
204 //              .    space to call laqr0 directly. ====
205 //
206                 info = laqr0(wantT, wantZ,
207                              iLo, kBot, H, wr, wi,
208                              iLo, iHi, Z, work);
209             } else {
210 //
211 //              ==== Tiny matrices don't have enough subdiagonal
212 //              .    scratch space to benefit from DLAQR0.  Hence,
213 //              .    tiny matrices must be copied into a larger
214 //              .    array before calling DLAQR0. ====
215 //
216                 Hl(_(1,n),_(1,n)) = H;
217                 Hl(n+1, n) = Zero;
218                 Hl(_(1,nl),_(n+1,nl)) = Zero;
219                 info = laqr0(wantT, wantZ,
220                              iLo, kBot, Hl, wr, wi,
221                              iLo, iHi, Z, workl);
222                 if (wantT || (info!=0)) {
223                     H = Hl(_(1,n),_(1,n));
224                 }
225             }
226         }
227     }
228 //
229 //  ==== Clear out the trash, if necessary. ====
230 //
231     if ((wantT || (info!=0)) && (n>2)) {
232         H(_(3,n),_(1,n-2)).lower() = Zero;
233     }
234 //
235 //  ==== Ensure reported workspace size is backward-compatible with
236 //  .    previous LAPACK versions. ====
237 //
238     work(1) = max(T(max(IndexType(1),n)), work(1));
239     return info;
240 }
241 
242 //== interface for native lapack ===============================================
243 
244 #ifdef CHECK_CXXLAPACK
245 
246 template <typename IndexType, typename MH>
247 IndexType
248 hseqr_native_wsq(HSEQR::Job         job,
249                  HSEQR::ComputeZ    computeZ,
250                  IndexType          iLo,
251                  IndexType          iHi,
252                  const GeMatrix<MH> &H)
253 {
254     typedef typename GeMatrix<MH>::ElementType  T;
255 
256     const char JOB      = getF77LapackChar<HSEQR::Job>(job);
257     const char COMPZ    = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
258     const INTEGER N     = H.numCols();
259     const INTEGER ILO  = iLo;
260     const INTEGER IHI  = iHi;
261     const INTEGER LDH   = H.leadingDimension();
262     const INTEGER LDZ   = H.leadingDimension();
263     T WORK              = 0;
264     T DUMMY             = 0;
265     const INTEGER LWORK = -1;
266     INTEGER INFO;
267 
268     if (IsSame<T,DOUBLE>::value) {
269         LAPACK_IMPL(dhseqr)(&JOB,
270                             &COMPZ,
271                             &N,
272                             &ILO,
273                             &IHI,
274                             &DUMMY,
275                             &LDH,
276                             &DUMMY,
277                             &DUMMY,
278                             &DUMMY,
279                             &LDZ,
280                             &WORK,
281                             &LWORK,
282                             &INFO);
283     } else {
284         ASSERT(0);
285     }
286 
287 #   ifndef NDEBUG
288     if (INFO!=0) {
289         std::cerr << "INFO = " << INFO << std::endl;
290         ASSERT(INFO==0);
291     }
292 #   endif
293 
294     return WORK;
295 }
296 
297 template <typename IndexType, typename MH, typename VWR, typename VWI,
298           typename MZ, typename VWORK>
299 IndexType
300 hseqr_native(HSEQR::Job            job,
301              HSEQR::ComputeZ       computeZ,
302              IndexType             iLo,
303              IndexType             iHi,
304              GeMatrix<MH>          &H,
305              DenseVector<VWR>      &wr,
306              DenseVector<VWI>      &wi,
307              GeMatrix<MZ>          &Z,
308              DenseVector<VWORK>    &work)
309 {
310     typedef typename GeMatrix<MH>::ElementType  T;
311 
312     const char JOB      = getF77LapackChar<HSEQR::Job>(job);
313     const char COMPZ    = getF77LapackChar<HSEQR::ComputeZ>(computeZ);
314     const INTEGER N     = H.numCols();
315     const INTEGER ILO   = iLo;
316     const INTEGER IHI   = iHi;
317     const INTEGER LDH   = H.leadingDimension();
318     const INTEGER LDZ   = Z.leadingDimension();
319     const INTEGER LWORK = work.length();
320     INTEGER INFO;
321 
322     if (IsSame<T,DOUBLE>::value) {
323         LAPACK_IMPL(dhseqr)(&JOB,
324                             &COMPZ,
325                             &N,
326                             &ILO,
327                             &IHI,
328                             H.data(),
329                             &LDH,
330                             wr.data(),
331                             wi.data(),
332                             Z.data(),
333                             &LDZ,
334                             work.data(),
335                             &LWORK,
336                             &INFO);
337     } else {
338         ASSERT(0);
339     }
340     ASSERT(INFO>=0);
341 
342     return INFO;
343 }
344 
345 #endif // CHECK_CXXLAPACK
346 
347 //== public interface ==========================================================
348 
349 template <typename IndexType, typename MH>
350 IndexType
351 hseqr_wsq(HSEQR::Job            job,
352           HSEQR::ComputeZ       computeZ,
353           IndexType             iLo,
354           IndexType             iHi,
355           const GeMatrix<MH>    &H)
356 {
357     LAPACK_DEBUG_OUT("hseqr_wsq");
358 
359 //
360 //  Test the input parameters
361 //
362     ASSERT(H.firstRow()==1);
363     ASSERT(H.firstCol()==1);
364     ASSERT(H.numRows()==H.numCols());
365 
366 //
367 //  Call implementation
368 //
369     IndexType info = hseqr_generic_wsq(job, computeZ, iLo, iHi, H);
370 
371 #   ifdef CHECK_CXXLAPACK
372 //
373 //  Compare results
374 //
375     IndexType _info =  hseqr_native_wsq(job, computeZ, iLo, iHi, H);
376 
377     if (info!=_info) {
378         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
379         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
380         ASSERT(0);
381     }
382 #   endif
383     return info;
384 }
385 
386 template <typename IndexType, typename MH, typename VWR, typename VWI,
387           typename MZ, typename VWORK>
388 IndexType
389 hseqr(HSEQR::Job                job,
390       HSEQR::ComputeZ           computeZ,
391       IndexType                 iLo,
392       IndexType                 iHi,
393       GeMatrix<MH>              &H,
394       DenseVector<VWR>          &wr,
395       DenseVector<VWI>          &wi,
396       GeMatrix<MZ>              &Z,
397       DenseVector<VWORK>        &work)
398 {
399     LAPACK_DEBUG_OUT("hseqr");
400 
401 //
402 //  Test the input parameters
403 //
404     ASSERT(H.firstRow()==1);
405     ASSERT(H.firstCol()==1);
406     ASSERT(H.numRows()==H.numCols());
407     ASSERT(wr.firstIndex()==1);
408     ASSERT(wr.length()==H.numCols());
409     ASSERT(wi.firstIndex()==1);
410     ASSERT(wi.length()==H.numCols());
411     ASSERT((computeZ==HSEQR::No) || (Z.numRows()==H.numCols()));
412     ASSERT((computeZ==HSEQR::No) || (Z.numCols()==H.numCols()));
413     ASSERT((work.length()==0) || (work.length()>=H.numCols()));
414 
415 #   ifdef CHECK_CXXLAPACK
416 //
417 //  Make copies of output arguments
418 //
419     typename GeMatrix<MH>::NoView           H_org   = H;
420 
421     typename GeMatrix<MH>::NoView           _H      = H;
422     typename DenseVector<VWR>::NoView       _wr     = wr;
423     typename DenseVector<VWI>::NoView       _wi     = wi;
424     typename GeMatrix<MZ>::NoView           _Z      = Z;
425     typename DenseVector<VWORK>::NoView     _work   = work;
426 #   endif
427 
428 //
429 //  Call implementation
430 //
431     IndexType info = hseqr_generic(job, computeZ, iLo, iHi, H,
432                                    wr, wi, Z, work);
433 
434 #   ifdef CHECK_CXXLAPACK
435 //
436 //  Compare results
437 //
438     // TODO: also check workspace query directly!
439     if (_work.length()==0) {
440         _work.resize(work.length());
441     }
442 
443     IndexType _info = hseqr_native(job, computeZ, iLo, iHi, _H,
444                                    _wr, _wi, _Z, _work);
445 
446     bool failed = false;
447     if (! isIdentical(H, _H, " H""_H")) {
448         std::cerr << "CXXLAPACK:  H = " << H << std::endl;
449         std::cerr << "F77LAPACK: _H = " << _H << std::endl;
450         failed = true;
451     }
452 
453     if (! isIdentical(wr, _wr, " wr""_wr")) {
454         std::cerr << "CXXLAPACK:  wr = " << wr << std::endl;
455         std::cerr << "F77LAPACK: _wr = " << _wr << std::endl;
456         failed = true;
457     }
458 
459     if (! isIdentical(wi, _wi, " wi""_wi")) {
460         std::cerr << "CXXLAPACK:  wi = " << wi << std::endl;
461         std::cerr << "F77LAPACK: _wi = " << _wi << std::endl;
462         failed = true;
463     }
464 
465     if (! isIdentical(Z, _Z, " Z""_Z")) {
466         std::cerr << "CXXLAPACK:  Z = " << Z << std::endl;
467         std::cerr << "F77LAPACK: _Z = " << _Z << std::endl;
468         failed = true;
469     }
470 
471     if (! isIdentical(info, _info, " info""_info")) {
472         std::cerr << "CXXLAPACK:  info = " << info << std::endl;
473         std::cerr << "F77LAPACK: _info = " << _info << std::endl;
474         failed = true;
475     }
476 
477     if (! isIdentical(work, _work, " work""_work")) {
478         std::cerr << "CXXLAPACK:  work = " << work << std::endl;
479         std::cerr << "F77LAPACK: _work = " << _work << std::endl;
480         failed = true;
481     }
482 
483     if (failed) {
484         std::cerr << "H_org = " << H_org << std::endl;
485 
486         std::cerr << "job = " << job
487                   << ", computeZ = " << computeZ
488                   << ", iLo = " << iLo
489                   << ", iHi = " << iHi
490                   << ", H.numRows() = " << H.numRows()
491                   << std::endl;
492         ASSERT(0);
493     }
494 #   endif
495     return info;
496 }
497 
498 //-- forwarding ----------------------------------------------------------------
499 template <typename IndexType, typename MH>
500 IndexType
501 hseqr_wsq(HSEQR::Job        job,
502           HSEQR::ComputeZ   computeZ,
503           IndexType         iLo,
504           IndexType         iHi,
505           const MH          &&H)
506 {
507     CHECKPOINT_ENTER;
508     const IndexType result = hseqr_wsq(job, computeZ, iLo, iHi, H);
509     CHECKPOINT_LEAVE;
510 
511     return result;
512 }
513 
514 template <typename IndexType, typename MH, typename VWR, typename VWI,
515           typename MZ, typename VWORK>
516 IndexType
517 hseqr(HSEQR::Job                job,
518       HSEQR::ComputeZ           computeZ,
519       IndexType                 iLo,
520       IndexType                 iHi,
521       MH                        &&H,
522       VWR                       &&wr,
523       VWI                       &&wi,
524       MZ                        &&Z,
525       VWORK                     &&work)
526 {
527     CHECKPOINT_ENTER;
528     const IndexType result = hseqr(job, computeZ, iLo, iHi, H, wr, wi, Z, work);
529     CHECKPOINT_LEAVE;
530 
531     return result;
532 }
533 
534 } } // namespace lapack, flens
535 
536 #endif // FLENS_LAPACK_EIG_HSEQR_TCC