1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 36      $                   WORK, INFO )
 37  *
 38  *  -- LAPACK routine (version 3.3.1) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *  -- April 2011                                                      --
 42  */
 43 
 44 #ifndef FLENS_LAPACK_QR_ORM2R_TCC
 45 #define FLENS_LAPACK_QR_ORM2R_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 template <typename MA, typename VTAU, typename MC, typename VWORK>
 54 void
 55 orm2r_generic(Side side, Transpose trans, GeMatrix<MA> &A,
 56               const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
 57               DenseVector<VWORK> &work)
 58 {
 59     typedef typename GeMatrix<MC>::IndexType    IndexType;
 60     typedef typename GeMatrix<MC>::ElementType  T;
 61 
 62     typedef Range<IndexType>    Range;
 63     const Underscore<IndexType> _;
 64 
 65     const IndexType m = C.numRows();
 66     const IndexType n = C.numCols();
 67     const IndexType k = A.numCols();
 68 
 69     const bool noTrans = ((trans==Trans) || (trans==ConjTrans)) ? false
 70                                                                 : true;
 71 //
 72 //  nq is the order of Q
 73 //
 74     const IndexType nq = (side==Left) ? m : n;
 75 
 76     ASSERT(A.numRows()==nq);
 77     ASSERT(k<=nq);
 78 
 79 //
 80 //  Quick return if possible
 81 //
 82     if ((m==0) || (n==0) || (k==0)) {
 83         return;
 84     }
 85 
 86     IndexType iBeg, iEnd, iInc;
 87     if ((side==Left && !noTrans) || (side==Right && noTrans)) {
 88         iBeg = 1;
 89         iEnd = k;
 90         iInc = 1;
 91     } else {
 92         iBeg = k;
 93         iEnd = 1;
 94         iInc = -1;
 95     }
 96     iEnd += iInc;
 97 
 98     Range rows = _(1,m);
 99     Range cols = _(1,n);
100 
101     for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
102         if (side==Left) {
103 //
104 //          H(i) is applied to C(i:m,1:n)
105 //
106             rows = _(i,m);
107         } else {
108 //
109 //          H(i) is applied to C(1:m,i:n)
110 //
111             cols = _(i,n);
112         }
113 //
114 //      Apply H(i)
115 //
116         const T Aii = A(i,i);
117         A(i,i) = T(1);
118         larf(side, A(_(i,nq), i), tau(i), C(rows, cols), work);
119         A(i,i) = Aii;
120     }
121 }
122 
123 //== interface for native lapack ===============================================
124 
125 #ifdef CHECK_CXXLAPACK
126 
127 template <typename MA, typename VTAU, typename MC, typename VWORK>
128 void
129 orm2r_native(Side side, Transpose trans, GeMatrix<MA> &A,
130              const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
131              DenseVector<VWORK> &work)
132 {
133     typedef typename GeMatrix<MC>::ElementType    T;
134 
135     const char      SIDE  = char(side);
136     const char      TRANS = getF77LapackChar(trans);
137     const INTEGER   M = C.numRows();
138     const INTEGER   N = C.numCols();
139     const INTEGER   K = A.numCols();
140     const INTEGER   LDA = A.leadingDimension();
141     const INTEGER   LDC = C.leadingDimension();
142     INTEGER         INFO;
143 
144     if (IsSame<T,DOUBLE>::value) {
145         LAPACK_DECL(dorm2r)(&SIDE,
146                             &TRANS,
147                             &M,
148                             &N,
149                             &K,
150                             A.data(),
151                             &LDA,
152                             tau.data(),
153                             C.data(),
154                             &LDC,
155                             work.data(),
156                             &INFO);
157     } else {
158         ASSERT(0);
159     }
160     ASSERT(INFO==0);
161 }
162 
163 #endif // CHECK_CXXLAPACK
164 
165 //== public interface ==========================================================
166 
167 template <typename MA, typename VTAU, typename MC, typename VWORK>
168 void
169 orm2r(Side side, Transpose trans, GeMatrix<MA> &A,
170       const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
171       DenseVector<VWORK> &work)
172 {
173     typedef typename GeMatrix<MC>::IndexType    IndexType;
174 
175 //
176 //  Test the input parameters
177 //
178 #   ifndef NDEBUG
179     const IndexType m = C.numRows();
180     const IndexType n = C.numCols();
181     const IndexType k = A.numCols();
182 
183     ASSERT(tau.length()==k);
184 
185     if (side==Left) {
186         ASSERT(A.numRows()==m);
187     } else {
188         ASSERT(A.numRows()==n);
189     }
190 
191     if (work.length()>0) {
192         if (side==Left) {
193             ASSERT(work.length()==n);
194         } else {
195             ASSERT(work.length()==m);
196         }
197     }
198 #   endif
199 //
200 //  Make copies of output arguments
201 //
202 #   ifdef CHECK_CXXLAPACK
203     typename GeMatrix<MA>::NoView       A_org      = A;
204     typename GeMatrix<MC>::NoView       C_org      = C;
205     typename DenseVector<VWORK>::NoView work_org   = work;
206 #   endif
207 
208 //
209 //  Call implementation
210 //
211     orm2r_generic(side, trans, A, tau, C, work);
212 
213 #   ifdef CHECK_CXXLAPACK
214 //
215 //  Make copies of results computed by the generic implementation
216 //
217     typename GeMatrix<MA>::NoView       A_generic       = A;
218     typename GeMatrix<MC>::NoView       C_generic       = C;
219     typename DenseVector<VWORK>::NoView work_generic    = work;
220 
221 //
222 //  restore output arguments
223 //
224     A = A_org;
225     C = C_org;
226     work = work_org;
227 
228 //
229 //  Compare generic results with results from the native implementation
230 //
231     orm2r_native(side, trans, A, tau, C, work);
232 
233     bool failed = false;
234     if (! isIdentical(A_generic, A, "A_generic""A")) {
235         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
236         std::cerr << "F77LAPACK: A = " << A << std::endl;
237         failed = true;
238     }
239     if (! isIdentical(C_generic, C, "C_generic""C")) {
240         std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
241         std::cerr << "F77LAPACK: C = " << C << std::endl;
242         failed = true;
243     }
244     if (! isIdentical(work_generic, work, "work_generic""work")) {
245         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
246         std::cerr << "F77LAPACK: work = " << work << std::endl;
247         failed = true;
248     }
249 
250     if (failed) {
251         std::cerr << "error in: orm2r.tcc" << std::endl;
252         ASSERT(0);
253     } else {
254         // std::cerr << "passed: orm2r.tcc" << std::endl;
255     }
256 #   endif
257 
258 }
259 
260 //-- forwarding ----------------------------------------------------------------
261 template <typename MA, typename VTAU, typename MC, typename VWORK>
262 void
263 orm2r(Side side, Transpose trans, MA &&A, const VTAU &tau, MC &&C,
264       VWORK &&work)
265 {
266     CHECKPOINT_ENTER;
267     orm2r(side, trans, A, tau, C, work);
268     CHECKPOINT_LEAVE;
269 }
270 
271 
272 } } // namespace lapack, flens
273 
274 #endif // FLENS_LAPACK_QR_ORM2R_TCC