1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DORGLQ( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.3.1) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *  -- April 2011                                                      --
 41  */
 42 
 43 #ifndef FLENS_LAPACK_LQ_ORGLQ_TCC
 44 #define FLENS_LAPACK_LQ_ORGLQ_TCC 1
 45 
 46 #include <flens/blas/blas.h>
 47 #include <flens/lapack/lapack.h>
 48 
 49 namespace flens { namespace lapack {
 50 
 51 //== generic lapack implementation =============================================
 52 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
 53 void
 54 orglq_generic(IndexType k, GeMatrix<MA> &A, const DenseVector<VTAU> &tau,
 55               DenseVector<VWORK> &work)
 56 {
 57     using std::max;
 58     using std::min;
 59 
 60     typedef typename GeMatrix<MA>::ElementType  T;
 61 
 62     const Underscore<IndexType> _;
 63     const IndexType m = A.numRows();
 64     const IndexType n = A.numCols();
 65 
 66     const T Zero(0);
 67 
 68 //
 69 //  Perform and apply workspace query
 70 //
 71     IndexType nb = ilaenv<T>(1"ORGLQ""", m, n, k);
 72     const IndexType lWorkOpt = max(IndexType(1), m) * nb;
 73     if (work.length()==0) {
 74         work.resize(max(lWorkOpt,IndexType(1)));
 75         work(1)=lWorkOpt;
 76     }
 77 //
 78 //  Quick return if possible
 79 //
 80     if (m<=0) {
 81         work(1) = 1;
 82         return;
 83     }
 84 
 85     IndexType nbMin = 2;
 86     IndexType nx = 0;
 87     IndexType iws = m;
 88     IndexType ldWork = m;
 89 
 90     if ((nb>1) && (nb<k)) {
 91 //
 92 //      Determine when to cross over from blocked to unblocked code.
 93 //
 94         nx = max(IndexType(0), IndexType(ilaenv<T>(3"ORGLQ""", m, n, k)));
 95         if (nx<k) {
 96 //
 97 //          Determine if workspace is large enough for blocked code.
 98 //
 99             ldWork = m;
100             iws = ldWork *nb;
101             if (work.length()<iws) {
102 //
103 //              Not enough workspace to use optimal NB:  reduce NB and
104 //              determine the minimum value of NB.
105 //
106                 nb = work.length() / ldWork;
107                 nbMin = max(IndexType(2),
108                             IndexType(ilaenv<T>(2"ORGLQ""", m, n, k)));
109             }
110         }
111     }
112 
113     IndexType ki = -1,
114               kk = -1;
115 
116     if ((nb>=nbMin) && (nb<k) && (nx<k)) {
117 //
118 //      Use blocked code after the last block.
119 //      The first kk columns are handled by the block method.
120 //
121         ki = ((k-nx-1)/nb)*nb;
122         kk = min(k, ki+nb);
123 //
124 //      Set A(kk+1:m,1:kk) to zero.
125 //
126         A(_(kk+1,m),_(1,kk)) = Zero;
127     } else {
128         kk = 0;
129     }
130 
131 //
132 //  Use unblocked code for the last or only block.
133 //
134     if (kk<n) {
135         orgl2(k-kk, A(_(kk+1,m),_(kk+1,n)), tau(_(kk+1, k)), work(_(1,m-kk)));
136     }
137 
138     if (kk>0) {
139         typename GeMatrix<MA>::View Work(m, nb, work);
140 //
141 //      Use blocked code
142 //
143         for (IndexType i=ki+1; i>=1; i-=nb) {
144             const IndexType ib = min(nb, k-i+1);
145             if (i+ib<=m) {
146 //
147 //              Form the triangular factor of the block reflector
148 //              H = H(i) H(i+1) . . . H(i+ib-1)
149 //
150                 auto Tr = Work(_(1,ib),_(1,ib)).upper();
151                 larft(Forward, RowWise,
152                       n-i+1,
153                       A(_(i,i+ib-1),_(i,n)),
154                       tau(_(i,i+ib-1)),
155                       Tr);
156 //
157 //              Apply H to A(i:m,i+ib:n) from the left
158 //
159                 larfb(Right, Trans, Forward, RowWise,
160                       A(_(i,i+ib-1),_(i,n)),
161                       Tr,
162                       A(_(i+ib,m),_(i,n)),
163                       Work(_(ib+1,m),_(1,ib)));
164             }
165 //
166 //          Apply H to rows i:m of current block
167 //
168             orgl2(ib, A(_(i,i+ib-1),_(i,n)), tau(_(i,i+ib-1)), work(_(1,ib)));
169 //
170 //          Set columns 1:i-1 of current block to zero
171 //
172             A(_(i,i+ib-1),_(1,i-1)) = Zero;
173         }
174     }
175     work(1) = iws;
176 }
177 
178 //== interface for native lapack ===============================================
179 
180 #ifdef CHECK_CXXLAPACK
181 
182 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
183 void
184 orglq_native(IndexType k, GeMatrix<MA> &A, const DenseVector<VTAU> &tau,
185              DenseVector<VWORK> &work)
186 {
187     typedef typename  GeMatrix<MA>::ElementType     T;
188 
189     const INTEGER M     = A.numRows();
190     const INTEGER N     = A.numCols();
191     const INTEGER K     = k;
192     const INTEGER LDA   = A.leadingDimension();
193     const INTEGER LWORK = work.length();
194     INTEGER INFO;
195 
196     if (IsSame<T,DOUBLE>::value) {
197         LAPACK_IMPL(dorglq)(&M,
198                             &N,
199                             &K,
200                             A.data(),
201                             &LDA,
202                             tau.data(),
203                             work.data(),
204                             &LWORK,
205                             &INFO);
206     } else {
207         ASSERT(0);
208     }
209     ASSERT(INFO==0);
210 }
211 
212 #endif // CHECK_CXXLAPACK
213 
214 //== public interface ==========================================================
215 
216 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
217 void
218 orglq(IndexType k, GeMatrix<MA> &A, const DenseVector<VTAU> &tau,
219       DenseVector<VWORK> &work)
220 {
221     typedef typename GeMatrix<MA>::ElementType  ElementType;
222 //
223 //  Test the input parameters
224 //
225 #   ifndef NDEBUG
226     ASSERT(A.firstRow()==IndexType(1));
227     ASSERT(A.firstCol()==IndexType(1));
228     ASSERT(tau.firstIndex()==IndexType(1));
229     ASSERT(tau.length()==k);
230     ASSERT((work.length()==0) || (work.length()>=A.numRows()));
231 
232     const IndexType m = A.numRows();
233     const IndexType n = A.numCols();
234 
235     ASSERT(n>=m);
236     ASSERT(m>=k);
237     ASSERT(k>=0);
238 #   endif 
239 
240 //
241 //  Make copies of output arguments
242 //
243 #   ifdef CHECK_CXXLAPACK
244     typename GeMatrix<MA>::NoView       A_org      = A;
245     typename DenseVector<VWORK>::NoView work_org   = work;
246 #   endif
247 
248 //
249 //  Call implementation
250 //
251     orglq_generic(k, A, tau, work);
252 
253 #   ifdef CHECK_CXXLAPACK
254 //
255 //  Restore output arguments
256 //
257     typename GeMatrix<MA>::NoView       A_generic      = A;
258     typename DenseVector<VWORK>::NoView work_generic   = work;
259 
260     A = A_org;
261 
262     if (work_org.length()!=0) {
263         work = work_org;
264     } else {
265         work = ElementType(0);
266     }
267 //
268 //  Compare results
269 //
270     orglq_native(k, A, tau, work);
271 
272     bool failed = false;
273     if (! isIdentical(A_generic, A, "A_generic""A")) {
274         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
275         std::cerr << "F77LAPACK: A = " << A << std::endl;
276         failed = true;
277     }
278 
279     if (! isIdentical(work_generic, work, "work_generic""work")) {
280         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
281         std::cerr << "F77LAPACK: work = " << work << std::endl;
282         failed = true;
283     }
284 
285     if (failed) {
286         std::cerr << "error in: orglq.tcc" << std::endl;
287         ASSERT(0);
288     } else {
289 //        std::cerr << "passed: orglq.tcc" << std::endl;
290     }
291 #   endif
292 }
293 
294 //-- forwarding ----------------------------------------------------------------
295 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
296 void
297 orglq(IndexType k, MA &&A, const VTAU &tau, VWORK &&work)
298 {
299     CHECKPOINT_ENTER;
300     orglq(k, A, tau, work);
301     CHECKPOINT_LEAVE;
302 }
303 
304 } } // namespace lapack, flens
305 
306 #endif // FLENS_LAPACK_LQ_ORGLQ_TCC