1 /*
  2  *   Copyright (c) 2011, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34       SUBROUTINE DORGQR( M, N, K, A, LDA, TAU, WORK, LWORK, INFO )
 35  *
 36  *  -- LAPACK routine (version 3.2) --
 37  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 38  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 39  *     November 2006
 40  */
 41 
 42 #ifndef FLENS_LAPACK_QR_ORGQR_TCC
 43 #define FLENS_LAPACK_QR_ORGQR_TCC 1
 44 
 45 #include <flens/blas/blas.h>
 46 #include <flens/lapack/lapack.h>
 47 
 48 namespace flens { namespace lapack {
 49 
 50 //== generic lapack implementation =============================================
 51 
 52 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
 53 void
 54 orgqr_generic(IndexType                 k,
 55               GeMatrix<MA>              &A,
 56               const DenseVector<VTAU>   &tau,
 57               DenseVector<VWORK>        &work)
 58 {
 59     using std::max;
 60     using std::min;
 61 
 62     typedef typename GeMatrix<MA>::ElementType  T;
 63     const T  Zero(0);
 64 
 65     const Underscore<IndexType> _;
 66     const IndexType m = A.numRows();
 67     const IndexType n = A.numCols();
 68 //
 69 //  Perform and apply workspace query
 70 //
 71     IndexType nb = ilaenv<T>(1"ORGQR""", m, n, k);
 72     const IndexType lWorkOpt = max(IndexType(1), n) * nb;
 73     if (work.length()==0) {
 74         work.resize(max(lWorkOpt,IndexType(1)));
 75         work(1)=lWorkOpt;
 76     }
 77 //
 78 //  Quick return if possible
 79 //
 80     if (n<=0) {
 81         work(1) = 1;
 82         return;
 83     }
 84 
 85     IndexType nbMin = 2;
 86     IndexType nx = 0;
 87     IndexType iws = n;
 88     IndexType ldWork = 1;
 89 
 90     if ((nb>1) && (nb<k)) {
 91 //
 92 //      Determine when to cross over from blocked to unblocked code.
 93 //
 94         nx = max(IndexType(0), IndexType(ilaenv<T>(3"ORGQR""", m, n, k)));
 95         if (nx<k) {
 96 //
 97 //          Determine if workspace is large enough for blocked code.
 98 //
 99             ldWork = n;
100             iws = ldWork *nb;
101             if (work.length()<iws) {
102 //
103 //              Not enough workspace to use optimal NB:  reduce NB and
104 //              determine the minimum value of NB.
105 //
106                 nb = work.length() / ldWork;
107                 nbMin = max(IndexType(2),
108                             IndexType(ilaenv<T>(2"ORGQR""", m, n, k)));
109             }
110         }
111     }
112 
113     IndexType ki = -1,
114               kk = -1;
115 
116     if ((nb>=nbMin) && (nb<k) && (nx<k)) {
117 //
118 //      Use blocked code after the last block.
119 //      The first kk columns are handled by the block method.
120 //
121         ki = ((k-nx-1)/nb)*nb;
122         kk = min(k, ki+nb);
123 //
124 //      Set A(1:kk,kk+1:n) to zero.
125 //
126         A(_(1,kk),_(kk+1,n)) = Zero;
127     } else {
128         kk = 0;
129     }
130 
131 //
132 //  Use unblocked code for the last or only block.
133 //
134     if (kk<n) {
135         org2r(k-kk, A(_(kk+1,m),_(kk+1,n)), tau(_(kk+1, k)), work(_(1,n-kk)));
136     }
137 
138     if (kk>0) {
139         typename GeMatrix<MA>::View Work(n, nb, work);
140 //
141 //      Use blocked code
142 //
143         for (IndexType i=ki+1; i>=1; i-=nb) {
144             const IndexType ib = min(nb, k-i+1);
145             if (i+ib<=n) {
146 //
147 //              Form the triangular factor of the block reflector
148 //              H = H(i) H(i+1) . . . H(i+ib-1)
149 //
150                 auto Tr = Work(_(1,ib),_(1,ib)).upper();
151                 larft(Forward, ColumnWise,
152                       m-i+1,
153                       A(_(i,m),_(i,i+ib-1)),
154                       tau(_(i,i+ib-1)),
155                       Tr);
156 //
157 //              Apply H to A(i:m,i+ib:n) from the left
158 //
159                 larfb(Left, NoTrans, Forward, ColumnWise,
160                       A(_(i,m),_(i,i+ib-1)),
161                       Tr,
162                       A(_(i,m),_(i+ib,n)),
163                       Work(_(ib+1,n),_(1,ib)));
164             }
165 //
166 //          Apply H to rows i:m of current block
167 //
168             org2r(ib, A(_(i,m),_(i,i+ib-1)), tau(_(i,i+ib-1)), work(_(1,ib)));
169 //
170 //          Set rows 1:i-1 of current block to zero
171 //
172             A(_(1,i-1),_(i,i+ib-1)) = Zero;
173         }
174     }
175     work(1) = iws;
176 }
177 
178 //== interface for native lapack ===============================================
179 
180 #ifdef CHECK_CXXLAPACK
181 
182 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
183 void
184 orgqr_native(IndexType                  k,
185               GeMatrix<MA>              &A,
186               const DenseVector<VTAU>   &tau,
187               DenseVector<VWORK>        &work)
188 {
189     typedef typename  GeMatrix<MA>::ElementType     T;
190 
191     const INTEGER M     = A.numRows();
192     const INTEGER N     = A.numCols();
193     const INTEGER K     = k;
194     const INTEGER LDA   = A.leadingDimension();
195     const INTEGER LWORK = work.length();
196     INTEGER INFO;
197 
198     if (IsSame<T,DOUBLE>::value) {
199         LAPACK_IMPL(dorgqr)(&M,
200                             &N,
201                             &K,
202                             A.data(),
203                             &LDA,
204                             tau.data(),
205                             work.data(),
206                             &LWORK,
207                             &INFO);
208     } else {
209         ASSERT(0);
210     }
211     ASSERT(INFO==0);
212 }
213 
214 #endif // CHECK_CXXLAPACK
215 
216 //== public interface ==========================================================
217 
218 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
219 void
220 orgqr(IndexType                 k,
221       GeMatrix<MA>              &A,
222       const DenseVector<VTAU>   &tau,
223       DenseVector<VWORK>        &work)
224 {
225     typedef typename GeMatrix<MA>::ElementType  ElementType;
226 //
227 //  Test the input parameters
228 //
229 #   ifndef NDEBUG
230     ASSERT(A.firstRow()==IndexType(1));
231     ASSERT(A.firstCol()==IndexType(1));
232     ASSERT(tau.firstIndex()==IndexType(1));
233     ASSERT(tau.length()==k);
234     ASSERT((work.length()==0) || (work.length()>=A.numCols()));
235 
236     const IndexType m = A.numRows();
237     const IndexType n = A.numCols();
238 
239     ASSERT(n<=m);
240     ASSERT(k<=n);
241     ASSERT(0<=k);
242 #   endif 
243 
244 //
245 //  Make copies of output arguments
246 //
247 #   ifdef CHECK_CXXLAPACK
248     typename GeMatrix<MA>::NoView       A_org      = A;
249     typename DenseVector<VWORK>::NoView work_org   = work;
250 #   endif
251 
252 //
253 //  Call implementation
254 //
255     orgqr_generic(k, A, tau, work);
256 
257 #   ifdef CHECK_CXXLAPACK
258 //
259 //  Restore output arguments
260 //
261     typename GeMatrix<MA>::NoView       A_generic      = A;
262     typename DenseVector<VWORK>::NoView work_generic   = work;
263 
264     A = A_org;
265 
266     if (work_org.length()!=0) {
267         work = work_org;
268     } else {
269         work = ElementType(0);
270     }
271 //
272 //  Compare results
273 //
274     orgqr_native(k, A, tau, work);
275 
276     bool failed = false;
277     if (! isIdentical(A_generic, A, "A_generic""A")) {
278         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
279         std::cerr << "F77LAPACK: A = " << A << std::endl;
280         failed = true;
281     }
282 
283     if (! isIdentical(work_generic, work, "work_generic""work")) {
284         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
285         std::cerr << "F77LAPACK: work = " << work << std::endl;
286         failed = true;
287     }
288 
289     if (failed) {
290         std::cerr << "error in: orgqr.tcc" << std::endl;
291         ASSERT(0);
292     } else {
293 //        std::cerr << "passed: orgqr.tcc" << std::endl;
294     }
295 #   endif
296 }
297 
298 //-- forwarding ----------------------------------------------------------------
299 template <typename IndexType, typename MA, typename VTAU, typename VWORK>
300 void
301 orgqr(IndexType     k,
302       MA            &&A,
303       const VTAU    &tau,
304       VWORK         &&work)
305 {
306     orgqr(k, A, tau, work);
307 }
308 
309 } } // namespace lapack, flens
310 
311 #endif // FLENS_LAPACK_QR_ORGQR_TCC