1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
 36       $                   VN2, AUXV, F, LDF )
 37  *
 38  *  -- LAPACK auxiliary routine (version 3.3.1) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *  -- April 2011                                                      --
 42  */
 43 
 44 #ifndef FLENS_LAPACK_QR_LAQPS_TCC
 45 #define FLENS_LAPACK_QR_LAQPS_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename MA, typename JPIV, typename VTAU,
 55           typename VN1, typename VN2, typename VAUX,
 56           typename MF>
 57 void
 58 laqps_generic(typename GeMatrix<MA>::IndexType  offset,
 59               typename GeMatrix<MA>::IndexType  nb,
 60               typename GeMatrix<MA>::IndexType  &kb,
 61               GeMatrix<MA>                      &A,
 62               DenseVector<JPIV>                 &jPiv,
 63               DenseVector<VTAU>                 &tau,
 64               DenseVector<VN1>                  &vn1,
 65               DenseVector<VN2>                  &vn2,
 66               DenseVector<VAUX>                 &aux,
 67               GeMatrix<MF>                      &F)
 68 {
 69     using std::abs;
 70     using std::max;
 71     using std::min;
 72     using std::sqrt;
 73     using std::swap;
 74 
 75     typedef typename GeMatrix<MA>::ElementType  ElementType;
 76     typedef typename GeMatrix<MA>::IndexType    IndexType;
 77 
 78     const Underscore<IndexType> _;
 79 
 80     const IndexType m     = A.numRows();
 81     const IndexType n     = A.numCols();
 82 
 83     const IndexType lastRk = min(m, n+offset);
 84 
 85     IndexType lasticc = 0;
 86     IndexType k = 0;
 87 
 88     const ElementType Zero(0), One(1);
 89     const ElementType tol3z = sqrt(lamch<ElementType>(Eps));
 90 //
 91 //  Beginning of while loop.
 92 //
 93     while (k<nb && lasticc==0) {
 94         ++k;
 95 
 96         const IndexType rk = offset + k;
 97 //
 98 //      Determine ith pivot column and swap if necessary
 99 //
100         IndexType pvt = (k-1) + blas::iamax(vn1(_(k,n)));
101         if (pvt!=k) {
102             blas::swap(A(_,pvt), A(_,k));
103             blas::swap(F(pvt,_(1,k-1)), F(k,_(1,k-1)));
104             swap(jPiv(pvt),jPiv(k));
105             vn1(pvt) = vn1(k);
106             vn2(pvt) = vn2(k);
107         }
108 //
109 //      Apply previous Householder reflectors to column K:
110 //      A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**T.
111 //
112         if (k>1) {
113             blas::mv(NoTrans, -One, A(_(rk,m),_(1,k-1)), F(k,_(1,k-1)),
114                      One, A(_(rk,m),k));
115         }
116 //
117 //      Generate elementary reflector H(k).
118 //
119         if (rk<m) {
120             larfg(m-rk+1, A(rk,k), A(_(rk+1,m),k), tau(k));
121         } else {
122             larfg(1, A(m,k), A(_(m+1,m),k), tau(k));
123         }
124 
125         const ElementType Akk = A(rk,k);
126         A(rk,k) = One;
127 //
128 //      Compute Kth column of F:
129 //
130 //      Compute  F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**T*A(RK:M,K).
131 //
132         if (k<n) {
133             blas::mv(Trans, tau(k), A(_(rk,m),_(k+1,n)), A(_(rk,m),k),
134                      Zero, F(_(k+1,n),k));
135         }
136 //
137 //      Padding F(1:K,K) with zeros.
138 //
139         F(_(1,k),k) = Zero;
140 //
141 //      Incremental updating of F:
142 //      F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**T
143 //                   *A(RK:M,K).
144 //
145         if (k>1) {
146             blas::mv(Trans, -tau(k), A(_(rk,m),_(1,k-1)), A(_(rk,m),k),
147                      Zero, aux(_(1,k-1)));
148             blas::mv(NoTrans, One, F(_,_(1,k-1)), aux(_(1,k-1)),
149                      One, F(_,k));
150         }
151 //
152 //      Update the current row of A:
153 //      A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**T.
154 //
155         if (k<n) {
156             blas::mv(NoTrans, -One, F(_(k+1,n),_(1,k)), A(rk,_(1,k)),
157                      One, A(rk,_(k+1,n)));
158         }
159 //
160 //      Update partial column norms.
161 //
162         if (rk<lastRk) {
163             for (IndexType j=k+1; j<=n; ++j) {
164                 if (vn1(j)!=Zero) {
165 //
166 //                  NOTE: The following 4 lines follow from the analysis in
167 //                  Lapack Working Note 176.
168 //
169                     ElementType tmp = abs(A(rk,j)) / vn1(j);
170                     tmp = max(Zero, (One+tmp)*(One-tmp));
171                     ElementType tmp2 = tmp*pow(vn1(j)/vn2(j),2);
172                     if (tmp2<=tol3z) {
173                         vn2(j) = ElementType(lasticc);
174                         lasticc = j;
175                     } else {
176                         vn1(j) *= sqrt(tmp);
177                     }
178                 }
179             }
180         }
181 
182         A(rk,k) = Akk;
183 //
184 //      End of while loop.
185 //
186     }
187     kb = k;
188     const IndexType rk = offset + kb;
189 //
190 //  Apply the block reflector to the rest of the matrix:
191 //  A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
192 //                        A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**T.
193 //
194     if (kb<min(n,m-offset)) {
195         blas::mm(NoTrans, Trans,
196                  -One, A(_(rk+1,m),_(1,kb)), F(_(kb+1,n),_(1,kb)),
197                  One, A(_(rk+1,m),_(kb+1,n)));
198     }
199 //
200 //  Recomputation of difficult columns.
201 //
202     while (lasticc>0) {
203         IndexType iTmp = nint(vn2(lasticc));
204         vn1(lasticc) = blas::nrm2(A(_(rk+1,m),lasticc));
205 //
206 //      NOTE: The computation of VN1( LSTICC ) relies on the fact that 
207 //      SNRM2 does not fail on vectors with norm below the value of
208 //      SQRT(DLAMCH('S')) 
209 //
210         vn2(lasticc) = vn1(lasticc);
211         lasticc = iTmp;
212     }
213 }
214 
215 //== interface for native lapack ===============================================
216 
217 #ifdef CHECK_CXXLAPACK
218 
219 template <typename MA, typename JPIV, typename VTAU,
220           typename VN1, typename VN2, typename VAUX,
221           typename MF>
222 void
223 laqps_native(typename GeMatrix<MA>::IndexType  offset,
224              typename GeMatrix<MA>::IndexType  nb,
225              typename GeMatrix<MA>::IndexType  &kb,
226              GeMatrix<MA>                      &A,
227              DenseVector<JPIV>                 &jPiv,
228              DenseVector<VTAU>                 &tau,
229              DenseVector<VN1>                  &vn1,
230              DenseVector<VN2>                  &vn2,
231              DenseVector<VAUX>                 &aux,
232              GeMatrix<MF>                      &F)
233 {
234     typedef typename GeMatrix<MA>::ElementType  T;
235 
236     const INTEGER    M      = A.numRows();
237     const INTEGER    N      = A.numCols();
238     const INTEGER    OFFSET = offset;
239     const INTEGER    NB     = nb;
240     INTEGER          KB     = kb;
241     const INTEGER    LDA    = A.leadingDimension();
242     const INTEGER    LDF    = F.leadingDimension();
243 
244     if (IsSame<T, DOUBLE>::value) {
245         LAPACK_DECL(dlaqps)(&M,
246                             &N,
247                             &OFFSET,
248                             &NB,
249                             &KB,
250                             A.data(),
251                             &LDA,
252                             jPiv.data(),
253                             tau.data(),
254                             vn1.data(),
255                             vn2.data(),
256                             aux.data(),
257                             F.data(),
258                             &LDF);
259         kb = KB;
260     } else {
261         ASSERT(0);
262     }
263 }
264 
265 #endif // CHECK_CXXLAPACK
266 
267 //== public interface ==========================================================
268 template <typename MA, typename JPIV, typename VTAU,
269           typename VN1, typename VN2, typename VAUX,
270           typename MF>
271 void
272 laqps(typename GeMatrix<MA>::IndexType  offset,
273       typename GeMatrix<MA>::IndexType  nb,
274       typename GeMatrix<MA>::IndexType  &kb,
275       GeMatrix<MA>                      &A,
276       DenseVector<JPIV>                 &jPiv,
277       DenseVector<VTAU>                 &tau,
278       DenseVector<VN1>                  &vn1,
279       DenseVector<VN2>                  &vn2,
280       DenseVector<VAUX>                 &aux,
281       GeMatrix<MF>                      &F)
282 {
283     std::cerr << "enter: laqps" << std::endl;
284     using std::min;
285     typedef typename GeMatrix<MA>::ElementType  ElementType;
286     typedef typename GeMatrix<MA>::IndexType    IndexType;
287 
288 #   ifndef NDEBUG
289 //
290 //  Test the input parameters
291 //
292     ASSERT(A.firstRow()==1);
293     ASSERT(A.firstCol()==1);
294     ASSERT(jPiv.firstIndex()==1);
295     ASSERT(tau.firstIndex()==1);
296     ASSERT(vn1.firstIndex()==1);
297     ASSERT(vn2.firstIndex()==1);
298     ASSERT(aux.firstIndex()==1);
299     ASSERT(F.firstRow()==1);
300     ASSERT(F.firstCol()==1);
301 
302     const IndexType n = A.numCols();
303 
304     ASSERT(jPiv.length()==n);
305     std::cerr << "tau.length() = " << tau.length() << std::endl;
306     std::cerr << "nb = " << nb << std::endl;
307     std::cerr << "kb = " << kb << std::endl;
308     ASSERT(tau.length()>=nb);
309     ASSERT(vn1.length()==n);
310     ASSERT(vn1.length()==n);
311     ASSERT(aux.length()==nb);
312     ASSERT(F.numRows()==n);
313     ASSERT(F.numCols()==nb);
314 #   endif
315 
316 #   ifdef CHECK_CXXLAPACK
317 //
318 //  Make copies of output arguments
319 //
320     typename GeMatrix<MA>::NoView       A_org      = A;
321     typename DenseVector<JPIV>::NoView  jPiv_org   = jPiv;
322     typename DenseVector<VTAU>::NoView  tau_org    = tau;
323     typename DenseVector<VN1>::NoView   vn1_org    = vn1;
324     typename DenseVector<VN2>::NoView   vn2_org    = vn2;
325     typename DenseVector<VAUX>::NoView  aux_org    = aux;
326     typename GeMatrix<MF>::NoView       F_org      = F;
327 #   endif
328 
329 //
330 //  Call implementation
331 //
332     laqps_generic(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
333 
334 #   ifdef CHECK_CXXLAPACK
335 //
336 //  Restore output arguments
337 //
338     typename GeMatrix<MA>::NoView       A_generic    = A;
339     typename DenseVector<JPIV>::NoView  jPiv_generic = jPiv;
340     typename DenseVector<VTAU>::NoView  tau_generic  = tau;
341     typename DenseVector<VN1>::NoView   vn1_generic  = vn1;
342     typename DenseVector<VN2>::NoView   vn2_generic  = vn2;
343     typename DenseVector<VAUX>::NoView  aux_generic  = aux;
344     typename GeMatrix<MF>::NoView       F_generic    = F;
345 
346     A    = A_org;
347     jPiv = jPiv_org;
348     tau  = tau_org;
349     vn1  = vn1_org;
350     vn2  = vn2_org;
351     aux  = aux_org;
352     F    = F_org;
353 
354 //
355 //  Compare results
356 //
357     laqps_native(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
358 
359     bool failed = false;
360     if (! isIdentical(A_generic, A, "A_generic""A")) {
361         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
362         std::cerr << "F77LAPACK: A = " << A << std::endl;
363         failed = true;
364     }
365 
366     if (! isIdentical(jPiv_generic, jPiv, "jPiv_generic""jPiv")) {
367         std::cerr << "CXXLAPACK: jPiv_generic = " << jPiv_generic << std::endl;
368         std::cerr << "F77LAPACK: jPiv = " << jPiv << std::endl;
369         failed = true;
370     }
371 
372     if (! isIdentical(tau_generic, tau, "tau_generic""tau")) {
373         std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
374         std::cerr << "F77LAPACK: tau = " << tau << std::endl;
375         failed = true;
376     }
377 
378     if (! isIdentical(vn1_generic, vn1, "vn1_generic""vn1")) {
379         std::cerr << "CXXLAPACK: vn1_generic = " << vn1_generic << std::endl;
380         std::cerr << "F77LAPACK: vn1 = " << vn1 << std::endl;
381         failed = true;
382     }
383 
384     if (! isIdentical(vn2_generic, vn2, "vn2_generic""vn2")) {
385         std::cerr << "CXXLAPACK: vn2_generic = " << vn2_generic << std::endl;
386         std::cerr << "F77LAPACK: vn2 = " << vn2 << std::endl;
387         failed = true;
388     }
389 
390     if (! isIdentical(aux_generic, aux, "aux_generic""aux")) {
391         std::cerr << "CXXLAPACK: aux_generic = " << aux_generic << std::endl;
392         std::cerr << "F77LAPACK: aux = " << aux << std::endl;
393         failed = true;
394     }
395 
396     if (! isIdentical(F_generic, F, "F_generic""F")) {
397         std::cerr << "CXXLAPACK: F_generic = " << F_generic << std::endl;
398         std::cerr << "F77LAPACK: F = " << F << std::endl;
399         failed = true;
400     }
401 
402     if (failed) {
403         ASSERT(0);
404     }
405 #   endif
406 
407     std::cerr << "leave: laqps" << std::endl;
408 }
409 
410 //-- forwarding ----------------------------------------------------------------
411 template <typename MA, typename JPIV, typename VTAU,
412           typename VN1, typename VN2, typename VAUX,
413           typename MF>
414 void
415 laqps(typename MA::IndexType  offset,
416       typename MA::IndexType  nb,
417       typename MA::IndexType  &kb,
418       MA                      &&A,
419       JPIV                    &&jPiv,
420       VTAU                    &&tau,
421       VN1                     &&vn1,
422       VN2                     &&vn2,
423       VAUX                    &&aux,
424       MF                      &&F)
425 {
426     CHECKPOINT_ENTER;
427     laqps(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
428     CHECKPOINT_LEAVE;
429 }
430 
431 } } // namespace lapack, flens
432 
433 #endif // FLENS_LAPACK_QR_LAQPS_TCC