1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DGELQF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
 36  *
 37  *  -- LAPACK routine (version 3.3.1) --
 38  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 39  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 40  *  -- April 2011                                                      --
 41  */
 42 
 43 #ifndef FLENS_LAPACK_LQ_LQF_TCC
 44 #define FLENS_LAPACK_LQ_LQF_TCC 1
 45 
 46 #include <flens/blas/blas.h>
 47 #include <flens/lapack/lapack.h>
 48 
 49 namespace flens { namespace lapack {
 50 
 51 //== generic lapack implementation =============================================
 52 template <typename MA, typename VTAU, typename VWORK>
 53 void
 54 lqf_generic(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
 55 {
 56     using std::max;
 57     using std::min;
 58 
 59     typedef typename GeMatrix<MA>::ElementType  T;
 60     typedef typename GeMatrix<MA>::IndexType    IndexType;
 61 
 62     const Underscore<IndexType> _;
 63 
 64     const IndexType m = A.numRows();
 65     const IndexType n = A.numCols();
 66     const IndexType k = min(m, n);
 67 
 68 //
 69 //  Perform and apply workspace query
 70 //
 71     IndexType nb = ilaenv<T>(1"GELQF""", m, n);
 72 
 73     const IndexType lWorkOpt = m*nb;
 74     if (work.length()==0) {
 75         work.resize(max(lWorkOpt,IndexType(1)));
 76         work(1)=lWorkOpt;
 77     }
 78 
 79 //
 80 //  Quick return if possible
 81 //
 82     if (k==0) {
 83         work(1) = 1;
 84         return;
 85     }
 86 
 87     IndexType nbMin = 2;
 88     IndexType nx = 0;
 89     IndexType iws = m;
 90     IndexType ldWork = -1;
 91 
 92     if ((nb>1) && (nb<k)) {
 93 //
 94 //      Determine when to cross over from blocked to unblocked code.
 95 //
 96         nx = max(IndexType(0), IndexType(ilaenv<T>(3"GELQF""", m, n)));
 97         if (nx<k) {
 98 //
 99 //          Determine if workspace is large enough for blocked code.
100 //
101             ldWork = m;
102             iws = ldWork*nb;
103             if (work.length()<iws) {
104 //
105 //              Not enough workspace to use optimal NB:  reduce NB and
106 //              determine the minimum value of NB.
107 //
108                 nb = work.length() / ldWork;
109                 nbMin = max(IndexType(2),
110                             IndexType(ilaenv<T>(2"GELQF"0, m, n)));
111             }
112         }
113     }
114 
115     IndexType i;
116     if (nb>=nbMin && nb<k && nx<k) {
117         typename GeMatrix<MA>::View Work(m, nb, work);
118 //
119 //      Use blocked code initially
120 //
121         for (i=1; i<=k-nx; i+=nb) {
122             const IndexType ib = min(k-i+1, nb);
123 //
124 //          Compute the LQ factorization of the current block
125 //          A(i:i+ib-1,i:n)
126 //
127             lq2(A(_(i,i+ib-1),_(i,n)), tau(_(i,i+ib-1)), work);
128             if (i+ib<=m) {
129 //
130 //              Form the triangular factor of the block reflector
131 //              H = H(i) H(i+1) . . . H(i+ib-1)
132 //
133                 auto Tr = Work(_(1,ib),_(1,ib)).upper();
134                 larft(Forward, RowWise,
135                       n-i+1,
136                       A(_(i,i+ib-1),_(i,n)),
137                       tau(_(i,i+ib-1)),
138                       Tr);
139 //
140 //              Apply H' to A(i:m,i+ib:n) from the left
141 //
142                 larfb(Right, NoTrans, Forward, RowWise,
143                       A(_(i,i+ib-1),_(i,n)),
144                       Tr,
145                       A(_(i+ib,m),_(i,n)),
146                       Work(_(ib+1,m),_(1,ib)));
147             }
148         }
149     } else {
150         i = 1;
151     }
152 
153 //
154 //  Use unblocked code to factor the last or only block.
155 //
156     if (i<=k) {
157         lq2(A(_(i,m),_(i,n)), tau(_(i,k)), work(_(1,m-i+1)));
158     }
159     work(1) = iws;
160 }
161 
162 //== interface for native lapack ===============================================
163 
164 #ifdef CHECK_CXXLAPACK
165 
166 template <typename MA, typename VTAU, typename VWORK>
167 void
168 lqf_native(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
169 {
170     typedef typename GeMatrix<MA>::ElementType  T;
171 
172     const INTEGER    M = A.numRows();
173     const INTEGER    N = A.numCols();
174     const INTEGER    LDA = A.leadingDimension();
175     INTEGER          LWORK = work.length();
176     INTEGER          INFO;
177 
178     if (IsSame<T, DOUBLE>::value) {
179         LAPACK_IMPL(dgelqf)(&M, &N, A.data(), &LDA,
180                             tau.data(),
181                             work.data(), &LWORK,
182                             &INFO);
183         assert(INFO==0);
184     } else {
185         ASSERT(0);
186     }
187 }
188 
189 #endif // CHECK_CXXLAPACK
190 
191 //== public interface ==========================================================
192 
193 template <typename MA, typename VTAU, typename VWORK>
194 void
195 lqf(GeMatrix<MA> &A, DenseVector<VTAU> &tau, DenseVector<VWORK> &work)
196 {
197     using std::min;
198     typedef typename GeMatrix<MA>::ElementType  ElementType;
199     typedef typename GeMatrix<MA>::IndexType    IndexType;
200 //
201 //  Test the input parameters
202 //
203 #   ifndef NDEBUG
204     ASSERT(A.firstRow()==1);
205     ASSERT(A.firstCol()==1);
206     ASSERT(tau.firstIndex()==1);
207     ASSERT(work.firstIndex()==1);
208 
209     const IndexType m = A.numRows();
210     const IndexType n = A.numCols();
211     const IndexType k = min(m,n);
212 
213     ASSERT(tau.length()==k);
214     ASSERT(work.length()>=m || work.length()==IndexType(0));
215 #   endif
216 
217 #   ifdef CHECK_CXXLAPACK
218 //
219 //  Make copies of output arguments
220 //
221     typename GeMatrix<MA>::NoView       A_org    = A;
222     typename DenseVector<VTAU>::NoView  tau_org  = tau;
223     typename DenseVector<VTAU>::NoView  work_org = work;
224 #   endif
225 
226 //
227 //  Call implementation
228 //
229     lqf_generic(A, tau, work);
230 
231 #   ifdef CHECK_CXXLAPACK
232 //
233 //  Restore output parameters
234 //
235     typename GeMatrix<MA>::NoView       A_generic    = A;
236     typename DenseVector<VTAU>::NoView  tau_generic  = tau;
237     typename DenseVector<VTAU>::NoView  work_generic = work;
238 
239     A = A_org;
240     tau = tau_org;
241 
242     // if the generic implementation resized work due to a work size query
243     // we must not restore the work array
244     if (work_org.length()>0) {
245         work = work_org;
246     } else {
247         work = 0;
248     }
249 //
250 //  Compare results
251 //
252     lqf_native(A, tau, work);
253 
254     bool failed = false;
255     if (! isIdentical(A_generic, A, "A_generic""A")) {
256         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
257         std::cerr << "F77LAPACK: A = " << A << std::endl;
258         failed = true;
259     }
260 
261     if (! isIdentical(tau_generic, tau, "tau_generic""tau")) {
262         std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
263         std::cerr << "F77LAPACK: tau = " << tau << std::endl;
264         failed = true;
265     }
266 
267     if (! isIdentical(work_generic, work, "work_generic""work")) {
268         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
269         std::cerr << "F77LAPACK: work = " << work << std::endl;
270         failed = true;
271     }
272 
273     if (failed) {
274         ASSERT(0);
275     }
276 #   endif
277 }
278 
279 //-- forwarding ----------------------------------------------------------------
280 template <typename MA, typename VTAU, typename VWORK>
281 void
282 lqf(MA &&A, VTAU &&tau, VWORK &&work)
283 {
284     CHECKPOINT_ENTER;
285     lqf(A, tau, work);
286     CHECKPOINT_LEAVE;
287 }
288 
289 } } // namespace lapack, flens
290 
291 #endif // FLENS_LAPACK_LQ_LQF_TCC