1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35       SUBROUTINE DORM2R( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
 36      $                   WORK, INFO )
 37  *
 38  *  -- LAPACK routine (version 3.3.1) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *  -- April 2011                                                      --
 42  */
 43 
 44 #ifndef FLENS_LAPACK_LQ_ORML2_TCC
 45 #define FLENS_LAPACK_LQ_ORML2_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 template <typename MA, typename VTAU, typename MC, typename VWORK>
 54 void
 55 orml2_generic(Side side, Transpose trans, GeMatrix<MA> &A,
 56               const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
 57               DenseVector<VWORK> &work)
 58 {
 59     typedef typename GeMatrix<MC>::IndexType    IndexType;
 60     typedef typename GeMatrix<MC>::ElementType  T;
 61 
 62     typedef Range<IndexType>    Range;
 63     const Underscore<IndexType> _;
 64 
 65     const IndexType m = C.numRows();
 66     const IndexType n = C.numCols();
 67     const IndexType k = A.numRows();
 68 
 69     const T  One(1);
 70 
 71     const bool noTrans = ((trans==Trans) || (trans==ConjTrans)) ? false
 72                                                                 : true;
 73 //
 74 //  nq is the order of Q
 75 //
 76     const IndexType nq = (side==Left) ? m : n;
 77 
 78 //
 79 //  Quick return if possible
 80 //
 81     if ((m==0) || (n==0) || (k==0)) {
 82         return;
 83     }
 84 
 85     IndexType iBeg, iEnd, iInc;
 86     if ((side==Left && noTrans) || (side==Right && !noTrans)) {
 87         iBeg = 1;
 88         iEnd = k;
 89         iInc = 1;
 90     } else {
 91         iBeg = k;
 92         iEnd = 1;
 93         iInc = -1;
 94     }
 95     iEnd += iInc;
 96 
 97     Range rows = _(1,m);
 98     Range cols = _(1,n);
 99 
100     for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
101         if (side==Left) {
102 //
103 //          H(i) is applied to C(i:m,1:n)
104 //
105             rows = _(i,m);
106         } else {
107 //
108 //          H(i) is applied to C(1:m,i:n)
109 //
110             cols = _(i,n);
111         }
112 //
113 //      Apply H(i)
114 //
115         const T Aii = A(i,i);
116         A(i,i) = One;
117         larf(side, A(i,_(i,nq)), tau(i), C(rows, cols), work);
118         A(i,i) = Aii;
119     }
120 }
121 
122 //== interface for native lapack ===============================================
123 
124 #ifdef CHECK_CXXLAPACK
125 
126 template <typename MA, typename VTAU, typename MC, typename VWORK>
127 void
128 orml2_native(Side side, Transpose trans, GeMatrix<MA> &A,
129              const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
130              DenseVector<VWORK> &work)
131 {
132     typedef typename GeMatrix<MC>::ElementType    T;
133 
134     const char      SIDE  = char(side);
135     const char      TRANS = getF77LapackChar(trans);
136     const INTEGER   M = C.numRows();
137     const INTEGER   N = C.numCols();
138     const INTEGER   K = A.numRows();
139     const INTEGER   LDA = A.leadingDimension();
140     const INTEGER   LDC = C.leadingDimension();
141     INTEGER         INFO;
142 
143     if (IsSame<T,DOUBLE>::value) {
144         LAPACK_DECL(dorml2)(&SIDE,
145                             &TRANS,
146                             &M,
147                             &N,
148                             &K,
149                             A.data(),
150                             &LDA,
151                             tau.data(),
152                             C.data(),
153                             &LDC,
154                             work.data(),
155                             &INFO);
156     } else {
157         ASSERT(0);
158     }
159     ASSERT(INFO==0);
160 }
161 
162 #endif // CHECK_CXXLAPACK
163 
164 //== public interface ==========================================================
165 
166 template <typename MA, typename VTAU, typename MC, typename VWORK>
167 void
168 orml2(Side side, Transpose trans, GeMatrix<MA> &A,
169       const DenseVector<VTAU> &tau, GeMatrix<MC> &C,
170       DenseVector<VWORK> &work)
171 {
172     typedef typename GeMatrix<MC>::IndexType    IndexType;
173 
174 //
175 //  Test the input parameters
176 //
177 #   ifndef NDEBUG
178     const IndexType m = C.numRows();
179     const IndexType n = C.numCols();
180     const IndexType k = A.numRows();
181 
182     const IndexType nq = (side==Left) ? m : n;
183 
184     ASSERT(tau.length()==k);
185 
186     ASSERT(k<=nq);
187     ASSERT(A.numRows()==k);
188     ASSERT(A.numCols()==nq);
189 
190     if (work.length()>0) {
191         if (side==Left) {
192             ASSERT(work.length()==n);
193         } else {
194             ASSERT(work.length()==m);
195         }
196     }
197 #   endif
198 //
199 //  Make copies of output arguments
200 //
201 #   ifdef CHECK_CXXLAPACK
202     typename GeMatrix<MA>::NoView       A_org      = A;
203     typename GeMatrix<MC>::NoView       C_org      = C;
204     typename DenseVector<VWORK>::NoView work_org   = work;
205 #   endif
206 
207 //
208 //  Call implementation
209 //
210     orml2_generic(side, trans, A, tau, C, work);
211 
212 #   ifdef CHECK_CXXLAPACK
213 //
214 //  Make copies of results computed by the generic implementation
215 //
216     typename GeMatrix<MA>::NoView       A_generic       = A;
217     typename GeMatrix<MC>::NoView       C_generic       = C;
218     typename DenseVector<VWORK>::NoView work_generic    = work;
219 
220 //
221 //  restore output arguments
222 //
223     A = A_org;
224     C = C_org;
225     work = work_org;
226 
227 //
228 //  Compare generic results with results from the native implementation
229 //
230     orml2_native(side, trans, A, tau, C, work);
231 
232     bool failed = false;
233     if (! isIdentical(A_generic, A, "A_generic""A")) {
234         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
235         std::cerr << "F77LAPACK: A = " << A << std::endl;
236         failed = true;
237     }
238     if (! isIdentical(C_generic, C, "C_generic""C")) {
239         std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
240         std::cerr << "F77LAPACK: C = " << C << std::endl;
241         failed = true;
242     }
243     if (! isIdentical(work_generic, work, "work_generic""work")) {
244         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
245         std::cerr << "F77LAPACK: work = " << work << std::endl;
246         failed = true;
247     }
248 
249     if (failed) {
250         std::cerr << "error in: orml2.tcc" << std::endl;
251         ASSERT(0);
252     } else {
253         // std::cerr << "passed: orml2.tcc" << std::endl;
254     }
255 #   endif
256 
257 }
258 
259 //-- forwarding ----------------------------------------------------------------
260 template <typename MA, typename VTAU, typename MC, typename VWORK>
261 void
262 orml2(Side side, Transpose trans, MA &&A, const VTAU &tau, MC &&C,
263       VWORK &&work)
264 {
265     CHECKPOINT_ENTER;
266     orml2(side, trans, A, tau, C, work);
267     CHECKPOINT_LEAVE;
268 }
269 
270 
271 } } // namespace lapack, flens
272 
273 #endif // FLENS_LAPACK_LQ_ORML2_TCC