1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 36       $                   WORK, LWORK, INFO )
 37  *
 38  *  -- LAPACK routine (version 3.3.1) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *  -- April 2011                                                      --
 42  */
 43 
 44 #ifndef FLENS_LAPACK_LQ_ORMLQ_TCC
 45 #define FLENS_LAPACK_LQ_ORMLQ_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename MA, typename MC>
 55 typename GeMatrix<MC>::IndexType
 56 ormlq_generic_wsq(Side              side,
 57                   Transpose         trans,
 58                   GeMatrix<MA>      &A,
 59                   GeMatrix<MC>      &C)
 60 {
 61     using std::max;
 62     using std::min;
 63 
 64     typedef typename GeMatrix<MC>::ElementType  T;
 65     typedef typename GeMatrix<MC>::IndexType    IndexType;
 66 
 67     typedef typename GeMatrix<MC>::View         GeView;
 68     typedef typename GeView::Engine             GeViewEngine;
 69 
 70 //
 71 //  Paramter for maximum block size and buffer for TrMatrix Tr.
 72 //
 73     const IndexType nbMax   = 64;
 74 
 75     const IndexType m = C.numRows();
 76     const IndexType n = C.numCols();
 77     const IndexType k = A.numRows();
 78 
 79     const IndexType nw      = (side==Left) ? n : m;
 80 
 81 //
 82 //  Determine the block size.  nb may be at most nbMax, where nbMax
 83 //  is used to define the local array tr.
 84 //
 85     char opt[3];
 86     opt[0] = char(side);
 87     if (trans==NoTrans) {
 88         opt[1] = 'N';
 89     } else if (trans==Conj) {
 90         opt[1] = 'R';
 91     } else if (trans==Trans) {
 92         opt[1] = 'T';
 93     } else if (trans==ConjTrans) {
 94         opt[1] = 'C';
 95     }
 96     opt[2] = 0;
 97 
 98     IndexType nb = min(nbMax, IndexType(ilaenv<T>(1"ORMLQ", opt, m, n, k)));
 99     return max(IndexType(1), nw)*nb;
100 }
101 
102 template <typename MA, typename VTAU, typename MC, typename VWORK>
103 void
104 ormlq_generic(Side                      side,
105               Transpose                 trans,
106               GeMatrix<MA>              &A,
107               const DenseVector<VTAU>   &tau,
108               GeMatrix<MC>              &C,
109               DenseVector<VWORK>        &work)
110 {
111     using std::max;
112     using std::min;
113 
114     typedef typename GeMatrix<MC>::ElementType  T;
115     typedef typename GeMatrix<MC>::IndexType    IndexType;
116 
117     typedef typename GeMatrix<MC>::View         GeView;
118     typedef typename GeView::Engine             GeViewEngine;
119 
120     const Underscore<IndexType> _;
121 
122 
123 //
124 //  Paramter for maximum block size and buffer for TrMatrix Tr.
125 //
126     const IndexType     nbMax = 64;
127     const IndexType     ldt = nbMax + 1;
128     T                   trBuffer[nbMax*ldt];
129 
130     const IndexType m = C.numRows();
131     const IndexType n = C.numCols();
132     const IndexType k = A.numRows();
133 
134     const bool noTrans = (trans==Trans || trans==ConjTrans) ? false : true;
135 //
136 //  nq is the order of Q and nw is the minimum dimension of work
137 //
138     IndexType nq, nw;
139     if (side==Left) {
140         nq = m;
141         nw = n;
142     } else {
143         nq = n;
144         nw = m;
145     }
146 //
147 //  Determine the block size.  nb may be at most nbMax, where nbMax
148 //  is used to define the local array tr.
149 //
150     char opt[3];
151     opt[0] = (side==Left) ? 'L' : 'R';
152     if (trans==NoTrans) {
153         opt[1] = 'N';
154     } else if (trans==Conj) {
155         opt[1] = 'R';
156     } else if (trans==Trans) {
157         opt[1] = 'T';
158     } else if (trans==ConjTrans) {
159         opt[1] = 'C';
160     }
161     opt[2] = 0;
162 
163     IndexType nb = min(nbMax, IndexType(ilaenv<T>(1"ORMLQ", opt, m, n, k)));
164     IndexType lWorkOpt = max(IndexType(1), nw)*nb;
165 
166     if (work.length()==0) {
167         work.resize(lWorkOpt);
168     }
169 
170 //
171 //  Quick return if possible
172 //
173     if ((m==0) || (n==0) || (k==0)) {
174         work(1) = 1;
175         return;
176     }
177 
178     IndexType nbMin = 2;
179     IndexType iws;
180     if ((nb>1) && (nb<k)) {
181         iws = lWorkOpt;
182         if (work.length()<iws) {
183             nb = work.length()/nw;
184             nbMin = max(nbMin, IndexType(ilaenv<T>(2"ORMLQ", opt, m, n, k)));
185         }
186     } else {
187         iws = nw;
188     }
189 
190     if ((nb<nbMin) || (nb>=k)) {
191 //
192 //      Use unblocked code
193 //
194         auto _work = (side==Left) ? work(_(1,n)) : work(_(1,m));
195         orml2(side, trans, A, tau, C, _work);
196     } else {
197 //
198 //      Use blocked code
199 //
200         IndexType iBeg, iInc, iEnd;
201         if ((side==Left && noTrans) || (side==Right && !noTrans)) {
202             iBeg = 1;
203             iEnd = ((k-1)/nb)*nb + 1;
204             iInc = nb;
205         } else {
206             iBeg = ((k-1)/nb)*nb + 1;
207             iEnd = 1;
208             iInc = -nb;
209         }
210         iEnd += iInc;
211 
212         IndexType ic, jc;
213         if (side==Left) {
214             jc = 1;
215         } else {
216             ic = 1;
217         }
218 
219         const Transpose transT = (trans==NoTrans) ? Trans : NoTrans;
220 
221         typename GeMatrix<MA>::View Work(nw, nb, work);
222 
223         for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
224             const IndexType ib = min(nb, k-i+1);
225             GeView          Tr = GeViewEngine(ib, ib, trBuffer, ldt);
226 //
227 //          Form the triangular factor of the block reflector
228 //          H = H(i) H(i+1) . . . H(i+ib-1)
229 //
230             larft(Forward, RowWise, nq-i+1,
231                   A(_(i,i+ib-1),_(i,nq)), tau(_(i,i+ib-1)), Tr.upper());
232 
233             if (side==Left) {
234 //
235 //              H or H**T is applied to C(i:m,1:n)
236 //
237                 ic = i;
238             } else {
239 //
240 //              H or H**T is applied to C(1:m,i:n)
241 //
242                 jc = i;
243             }
244 
245             larfb(side, transT,
246                   Forward, RowWise,
247                   A(_(i,i+ib-1),_(i,nq)), Tr.upper(),
248                   C(_(ic,m),_(jc,n)),
249                   Work(_,_(1,ib)));
250         }
251     }
252     work(1) = lWorkOpt;
253 }
254 
255 //== interface for native lapack ===============================================
256 
257 #ifdef CHECK_CXXLAPACK
258 
259 template <typename MA, typename MC>
260 typename GeMatrix<MC>::IndexType
261 ormlq_native_wsq(Side              side,
262                  Transpose         trans,
263                  GeMatrix<MA>      &A,
264                  GeMatrix<MC>      &C)
265 {
266     typedef typename GeMatrix<MC>::ElementType  T;
267 
268     const char      SIDE    = char(side);
269     const char      TRANS   = getF77LapackChar(trans);
270     const INTEGER   M       = C.numRows();
271     const INTEGER   N       = C.numCols();
272     const INTEGER   K       = A.numRows();
273     const INTEGER   LDA     = A.leadingDimension();
274     const INTEGER   LDC     = C.leadingDimension();
275     T               WORK, DUMMY;
276     const INTEGER   LWORK   = -1;
277     INTEGER         INFO;
278 
279     if (IsSame<T,DOUBLE>::value) {
280         LAPACK_IMPL(dormlq)(&SIDE,
281                             &TRANS,
282                             &M,
283                             &N,
284                             &K,
285                             A.data(),
286                             &LDA,
287                             &DUMMY,
288                             C.data(),
289                             &LDC,
290                             &WORK,
291                             &LWORK,
292                             &INFO);
293     } else {
294         ASSERT(0);
295     }
296 
297     ASSERT(INFO>=0);
298     return WORK;
299 }
300 
301 template <typename MA, typename VTAU, typename MC, typename VWORK>
302 void
303 ormlq_native(Side                       side,
304              Transpose                  trans,
305              GeMatrix<MA>               &A,
306              const DenseVector<VTAU>    &tau,
307              GeMatrix<MC>               &C,
308              DenseVector<VWORK>         &work)
309 {
310     typedef typename GeMatrix<MC>::ElementType  T;
311 
312     const char      SIDE    = char(side);
313     const char      TRANS   = getF77LapackChar(trans);
314     const INTEGER   M       = C.numRows();
315     const INTEGER   N       = C.numCols();
316     const INTEGER   K       = A.numRows();
317     const INTEGER   LDA     = A.leadingDimension();
318     const INTEGER   LDC     = C.leadingDimension();
319     const INTEGER   LWORK   = work.length();
320     INTEGER         INFO;
321 
322     if (IsSame<T,DOUBLE>::value) {
323         LAPACK_IMPL(dormlq)(&SIDE,
324                             &TRANS,
325                             &M,
326                             &N,
327                             &K,
328                             A.data(),
329                             &LDA,
330                             tau.data(),
331                             C.data(),
332                             &LDC,
333                             work.data(),
334                             &LWORK,
335                             &INFO);
336     } else {
337         ASSERT(0);
338     }
339 
340     ASSERT(INFO>=0);
341 }
342 
343 #endif // CHECK_CXXLAPACK
344 
345 //== public interface ==========================================================
346 
347 template <typename MA, typename MC>
348 typename GeMatrix<MC>::IndexType
349 ormlq_wsq(Side              side,
350           Transpose         trans,
351           GeMatrix<MA>      &A,
352           GeMatrix<MC>      &C)
353 {
354     typedef typename GeMatrix<MC>::IndexType    IndexType;
355 
356 //
357 //  Test the input parameters
358 //
359 #   ifndef NDEBUG
360     const IndexType m = C.numRows();
361     const IndexType n = C.numCols();
362     const IndexType k = A.numRows();
363 
364     if (side==Left) {
365         ASSERT(A.numCols()==m);
366     } else {
367         ASSERT(A.numCols()==n);
368     }
369 #   endif
370 
371 //
372 //  Call implementation
373 //
374     const IndexType info = ormlq_generic(side, trans, A, C);
375 
376 #   ifdef CHECK_CXXLAPACK
377 //
378 //  Compare generic results with results from the native implementation
379 //
380     const IndexType _info = ormlq_native(side, trans, A, C);
381 
382     ASSERT(info==_info);
383 #   endif
384     return info;
385 }
386 
387 template <typename MA, typename VTAU, typename MC, typename VWORK>
388 void
389 ormlq(Side                      side,
390       Transpose                 trans,
391       GeMatrix<MA>              &A,
392       const DenseVector<VTAU>   &tau,
393       GeMatrix<MC>              &C,
394       DenseVector<VWORK>        &work)
395 {
396 //
397 //  Test the input parameters
398 //
399 #   ifndef NDEBUG
400     typedef typename GeMatrix<MC>::IndexType    IndexType;
401 
402     const IndexType m = C.numRows();
403     const IndexType n = C.numCols();
404     const IndexType k = A.numRows();
405 
406     ASSERT(tau.length()==k);
407 
408     if (side==Left) {
409         ASSERT(A.numCols()==m);
410     } else {
411         ASSERT(A.numCols()==n);
412     }
413 
414     if (work.length()>0) {
415         if (side==Left) {
416             ASSERT(work.length()>=n);
417         } else {
418             ASSERT(work.length()>=m);
419         }
420     }
421 #   endif
422 
423 //
424 //  Make copies of output arguments
425 //
426 #   ifdef CHECK_CXXLAPACK
427     typename GeMatrix<MA>::NoView       A_org      = A;
428     typename GeMatrix<MC>::NoView       C_org      = C;
429     typename DenseVector<VWORK>::NoView work_org   = work;
430 #   endif
431 
432 //
433 //  Call implementation
434 //
435     ormlq_generic(side, trans, A, tau, C, work);
436 
437 #   ifdef CHECK_CXXLAPACK
438 //
439 //  Make copies of results computed by the generic implementation
440 //
441     typename GeMatrix<MA>::NoView       A_generic       = A;
442     typename GeMatrix<MC>::NoView       C_generic       = C;
443     typename DenseVector<VWORK>::NoView work_generic    = work;
444 
445 //
446 //  restore output arguments
447 //
448     A = A_org;
449     C = C_org;
450     work = work_org;
451 
452 //
453 //  Compare generic results with results from the native implementation
454 //
455     ormlq_native(side, trans, A, tau, C, work);
456 
457     bool failed = false;
458     if (! isIdentical(A_generic, A, "A_generic""A")) {
459         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
460         std::cerr << "F77LAPACK: A = " << A << std::endl;
461         failed = true;
462     }
463     if (! isIdentical(C_generic, C, "C_generic""C")) {
464         std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
465         std::cerr << "F77LAPACK: C = " << C << std::endl;
466         failed = true;
467     }
468     if (! isIdentical(work_generic, work, "work_generic""work")) {
469         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
470         std::cerr << "F77LAPACK: work = " << work << std::endl;
471         failed = true;
472     }
473 
474     if (failed) {
475         std::cerr << "error in: ormlq.tcc" << std::endl;
476         ASSERT(0);
477     } else {
478         // std::cerr << "passed: ormlq.tcc" << std::endl;
479     }
480 #   endif
481 }
482 
483 //-- forwarding ----------------------------------------------------------------
484 template <typename MA, typename MC>
485 typename MC::IndexType
486 ormlq_wsq(Side          side,
487           Transpose     trans,
488           MA            &&A,
489           MC            &&C)
490 {
491     typedef typename MC::IndexType IndexType;
492 
493     CHECKPOINT_ENTER;
494     const IndexType info = ormlq_wsq(side, trans, A, C);
495     CHECKPOINT_LEAVE;
496 
497     return info;
498 }
499 
500 template <typename MA, typename VTAU, typename MC, typename VWORK>
501 void
502 ormlq(Side              side,
503       Transpose         trans,
504       MA                &&A,
505       const VTAU        &tau,
506       MC                &&C,
507       VWORK             &&work)
508 {
509     // TODO: asser that A is non-const
510     CHECKPOINT_ENTER;
511     ormlq(side, trans, A, tau, C, work);
512     CHECKPOINT_LEAVE;
513 }
514 
515 } } // namespace lapack, flens
516 
517 #endif // FLENS_LAPACK_LQ_ORMLQ_TCC