1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.3.1)                                  --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *  -- April 2009                                                      --
 41  */
 42 
 43 #ifndef FLENS_LAPACK_EIG_HRD_TCC
 44 #define FLENS_LAPACK_EIG_HRD_TCC 1
 45 
 46 #include <flens/blas/blas.h>
 47 #include <flens/lapack/lapack.h>
 48 
 49 namespace flens { namespace lapack {
 50 
 51 //== generic lapack implementation =============================================
 52 
 53 template <typename IndexType, typename MA>
 54 IndexType
 55 hrd_generic_wsq(IndexType iLo, IndexType iHi, const GeMatrix<MA> &A)
 56 {
 57     using std::min;
 58 
 59     typedef typename GeMatrix<MA>::ElementType  T;
 60 
 61 //
 62 //  Perform and apply workspace query
 63 //
 64     const IndexType nbMax = 64;
 65     const IndexType n   = A.numCols();
 66 
 67     return n*min(nbMax, IndexType(ilaenv<T>(1,"GEHRD""", n, iLo, iHi)));
 68 }
 69 
 70 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
 71 void
 72 hrd_generic(IndexType           iLo,
 73             IndexType           iHi,
 74             GeMatrix<MA>        &A,
 75             DenseVector<VTAU>   &tau,
 76             DenseVector<VWORK>  &work)
 77 {
 78     using std::max;
 79     using std::min;
 80 
 81     typedef typename GeMatrix<MA>::ElementType  T;
 82     typedef typename GeMatrix<MA>::View         GeView;
 83     typedef typename GeView::Engine             GeViewEngine;
 84 
 85     const Underscore<IndexType> _;
 86 
 87 //
 88 //  Paramters, e.g. maximum block size and buffer on stack for TrMatrix Tr.
 89 //
 90     const T             One(1);
 91     const IndexType     nbMax = 64;
 92     const IndexType     ldt = nbMax + 1;
 93     T                   trBuffer[nbMax*ldt];
 94     GeView              _Tr = GeViewEngine(ldt, nbMax, trBuffer, ldt);
 95 //
 96 //  Perform and apply workspace query
 97 //
 98     const IndexType n = A.numCols();
 99     IndexType       nb = min(nbMax,
100                              IndexType(ilaenv<T>(1,"GEHRD""", n, iLo, iHi)));
101 
102     const IndexType lWorkOpt = n*nb;
103     if (work.length()==0) {
104         work.resize(lWorkOpt);
105         work(1) = lWorkOpt;
106     }
107 //
108 //  Set elements 1:iLo-1 and iHi:n-1 of tau to zero
109 //
110     tau(_(1,iLo-1)) = 0;
111     tau(_(max(IndexType(1),iHi),n-1)) = 0;
112 //
113 //  Quick return if possible
114 //
115     const IndexType nh = iHi - iLo + 1;
116     if (nh<=1) {
117         work(1) = 1;
118         return;
119     }
120 //
121 //  Determine the block size
122 //
123     IndexType nbMin = 2;
124     IndexType iws = 1;
125     IndexType nx = 0;
126 
127     if (nb>1 && nb<nh) {
128 //
129 //      Determine when to cross over from blocked to unblocked code
130 //      (last block is always handled by unblocked code)
131 //
132         nx = max(nb, IndexType(ilaenv<T>(3"GEHRD""", n, iLo, iHi)));
133         if (nx<nh) {
134 //
135 //          Determine if workspace is large enough for blocked code
136 //
137             iws = n*nb;
138             if (work.length()<iws) {
139 //
140 //              Not enough workspace to use optimal NB:  determine the
141 //              minimum value of NB, and reduce NB or force use of
142 //              unblocked code
143 //
144                 nbMin = max(2, ilaenv<T>(2"GEHRD""", n, iLo, iHi));
145                 if (work.length()>=n*nbMin) {
146                     nb = work.length() / n;
147                 } else {
148                     nb = 1;
149                 }
150             }
151         }
152     }
153     IndexType ldWork = n;
154     IndexType i = iLo;
155     if (nb<nbMin || nb>=nh) {
156 //
157 //      Use unblocked code below
158 //
159     } else {
160 //
161 //      Use blocked code
162 //
163         for (i=iLo; i<=iHi-1-nx; i+=nb) {
164             const IndexType ib = min(nb, iHi-i);
165             auto  Tr = _Tr(_(1,ib),_(1,ib)).upper();
166             GeView  Y(iHi, ib, work, ldWork);
167 //
168 //          Reduce columns i:i+ib-1 to Hessenberg form, returning the
169 //          matrices V and T of the block reflector H = I - V*T*V**T
170 //          which performs the reduction, and also the matrix Y = A*V*T
171 //
172             lahr2(i, ib, A(_(1,iHi),_(i,iHi)), tau(_(i,i+ib-1)), Tr, Y);
173 //
174 //          Partition V and the parts of matrix A that get modified
175 //
176             auto V1 = A(_(i+1, i+ib-1),_(i,i+ib-2));
177             auto V2 = A(_(i+ib,   iHi),_(i,i+ib-1));
178 //
179 //          Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
180 //          right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set
181 //          to 1
182 //
183             T ei = A(i+ib,i+ib-1);
184             A(i+ib,i+ib-1) = One;
185             blas::mm(NoTrans, Trans, -One, Y, V2, One, A(_(1,iHi),_(i+ib,iHi)));
186             A(i+ib,i+ib-1) = ei;
187 //
188 //          Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
189 //          right
190 //
191             blas::mm(Right, Trans, One, V1.lowerUnit(), Y(_(1,i),_(1,ib-1)));
192             A(_(1,i),_(i+1,i+ib-1)) -= Y(_(1,i),_(1,ib-1));
193 //
194 //          Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
195 //          left
196 //
197             GeView  Work(n-i-ib+1, ib, work, ldWork);
198             larfb(Left, Trans, Forward, ColumnWise,
199                   A(_(i+1,iHi),_(i,i+ib-1)),
200                   Tr,
201                   A(_(i+1,iHi),_(i+ib,n)),
202                   Work);
203         }
204     }
205 //
206 //  Use unblocked code to reduce the rest of the matrix
207 //
208     hd2(i, iHi, A, tau, work);
209     work(1) = iws;
210 }
211 
212 //== interface for native lapack ===============================================
213 
214 #ifdef CHECK_CXXLAPACK
215 
216 template <typename IndexType, typename MA>
217 IndexType
218 hrd_native_wsq(IndexType            iLo,
219                IndexType            iHi,
220                const GeMatrix<MA>   &A)
221 {
222     typedef typename GeMatrix<MA>::ElementType  T;
223 
224     const INTEGER    N      = A.numRows();
225     const INTEGER    ILO    = iLo;
226     const INTEGER    IHI    = iHi;
227     const INTEGER    LDA    = A.leadingDimension();
228     T                WORK;
229     T                DUMMY;
230     const INTEGER    LWORK  = -1;
231     INTEGER          INFO;
232 
233     if (IsSame<T, DOUBLE>::value) {
234         LAPACK_IMPL(dgehrd)(&N,
235                             &ILO,
236                             &IHI,
237                             &DUMMY,
238                             &LDA,
239                             &DUMMY,
240                             &WORK,
241                             &LWORK,
242                             &INFO);
243     } else {
244         ASSERT(0);
245     }
246     ASSERT(INFO==0);
247     return WORK;
248 }
249 
250 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
251 void
252 hrd_native(IndexType            iLo,
253            IndexType            iHi,
254            GeMatrix<MA>         &A,
255            DenseVector<VTAU>    &tau,
256            DenseVector<VWORK>   &work)
257 {
258     typedef typename GeMatrix<MA>::ElementType  T;
259 
260     const INTEGER    N      = A.numRows();
261     const INTEGER    ILO    = iLo;
262     const INTEGER    IHI    = iHi;
263     const INTEGER    LDA    = A.leadingDimension();
264     const INTEGER    LWORK  = work.length();
265     INTEGER          INFO;
266 
267     if (IsSame<T, DOUBLE>::value) {
268         LAPACK_IMPL(dgehrd)(&N,
269                             &ILO,
270                             &IHI,
271                             A.data(),
272                             &LDA,
273                             tau.data(),
274                             work.data(),
275                             &LWORK,
276                             &INFO);
277     } else {
278         ASSERT(0);
279     }
280 
281     ASSERT(INFO==0);
282 }
283 
284 #endif // CHECK_CXXLAPACK
285 
286 //== public interface ==========================================================
287 
288 template <typename IndexType, typename MA>
289 IndexType
290 hrd_wsq(IndexType           iLo,
291         IndexType           iHi,
292         const GeMatrix<MA>  &A)
293 {
294     LAPACK_DEBUG_OUT("hrd_wsq");
295 
296 //
297 //  Test the input parameters
298 //
299 #   ifndef NDEBUG
300     ASSERT(A.firstRow()==1);
301     ASSERT(A.firstCol()==1);
302     ASSERT(A.numRows()==A.numCols());
303 #   endif
304 
305 //
306 //  Call implementation
307 //
308     IndexType info = hrd_generic_wsq(iLo, iHi, A);
309 
310 #   ifdef CHECK_CXXLAPACK
311 //
312 //  Compare results
313 //
314     IndexType _info = hrd_native_wsq(iLo, iHi, A);
315 
316     if (! isIdentical(info, _info, " info""_info")) {
317         ASSERT(0);
318     }
319 #   endif
320 
321     return info;
322 }
323 
324 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
325 void
326 hrd(IndexType           iLo,
327     IndexType           iHi,
328     GeMatrix<MA>        &A,
329     DenseVector<VTAU>   &tau,
330     DenseVector<VWORK>  &work)
331 {
332     LAPACK_DEBUG_OUT("hrd");
333 
334     using std::max;
335 //
336 //  Test the input parameters
337 //
338 #   ifndef NDEBUG
339     ASSERT(A.firstRow()==1);
340     ASSERT(A.firstCol()==1);
341     ASSERT(A.numRows()==A.numCols());
342     ASSERT(tau.firstIndex()==1);
343 
344     const IndexType n = A.numCols();
345 
346     if (n==0) {
347         ASSERT(iLo==1);
348         ASSERT(iHi==0);
349     } else {
350         ASSERT(1<=iLo);
351         ASSERT(iLo<=iHi);
352         ASSERT(iHi<=n);
353     }
354 
355     ASSERT(tau.length()==(n-1));
356     const bool lQuery = (work.length()==0);
357     ASSERT(lQuery || (work.length()>=max(IndexType(1),n)));
358 #   endif
359 
360 //
361 //  Make copies of output arguments
362 //
363 #   ifdef CHECK_CXXLAPACK
364     typename GeMatrix<MA>::NoView       _A = A;
365     typename DenseVector<VTAU>::NoView  _tau = tau;
366     typename DenseVector<VWORK>::NoView _work = work;
367 #   endif
368 
369 //
370 //  Call implementation
371 //
372     hrd_generic(iLo, iHi, A, tau, work);
373 
374 #   ifdef CHECK_CXXLAPACK
375 //
376 //  Compare results
377 //
378     if (_work.length()==0) {
379         _work.resize(work.length());
380     }
381     hrd_native(iLo, iHi, _A, _tau, _work);
382 
383     bool failed = false;
384     if (! isIdentical(A, _A, " A""_A")) {
385         std::cerr << "CXXLAPACK:  A = " << A << std::endl;
386         std::cerr << "F77LAPACK: _A = " << _A << std::endl;
387         failed = true;
388     }
389 
390     if (! isIdentical(tau, _tau, " tau""_tau")) {
391         std::cerr << "CXXLAPACK:  tau = " << tau << std::endl;
392         std::cerr << "F77LAPACK: _tau = " << _tau << std::endl;
393         failed = true;
394     }
395 
396     if (! isIdentical(work, _work, " work""_work")) {
397         std::cerr << "CXXLAPACK:  work = " << work << std::endl;
398         std::cerr << "F77LAPACK: _work = " << _work << std::endl;
399         failed = true;
400     }
401 
402     if (failed) {
403         std::cerr << "error in: hrd.tcc" << std::endl;
404         ASSERT(0);
405     } else {
406 //        std::cerr << "passed: hrd.tcc" << std::endl;
407     }
408 
409 #   endif
410 }
411 
412 //-- forwarding ----------------------------------------------------------------
413 template <typename IndexType, typename MA>
414 IndexType
415 hrd_wsq(IndexType   iLo,
416         IndexType   iHi,
417         const MA    &&A)
418 {
419     CHECKPOINT_ENTER;
420     const IndexType info = hrd_wsq(iLo, iHi, A);
421     CHECKPOINT_LEAVE;
422 
423     return info;
424 }
425 
426 template <typename IndexType, typename MA,
427           typename VTAU, typename VWORK>
428 void
429 hrd(IndexType       iLo,
430     IndexType       iHi,
431     MA              &&A,
432     VTAU            &&tau,
433     VWORK           &&work)
434 {
435     CHECKPOINT_ENTER;
436     hrd(iLo, iHi, A, tau, work);
437     CHECKPOINT_LEAVE;
438 }
439 
440 } } // namespace lapack, flens
441 
442 #endif // FLENS_LAPACK_EIG_HRD_TCC