1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 /* Based on
 34  *
 35        SUBROUTINE DLAQP2( M, N, OFFSET, A, LDA, JPVT, TAU, VN1, VN2,
 36       $                   WORK )
 37  *
 38  *  -- LAPACK auxiliary routine (version 3.3.1) --
 39  *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
 40  *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
 41  *  -- April 2011                                                      --
 42  */
 43 
 44 #ifndef FLENS_LAPACK_QR_LAQP2_TCC
 45 #define FLENS_LAPACK_QR_LAQP2_TCC 1
 46 
 47 #include <flens/blas/blas.h>
 48 #include <flens/lapack/lapack.h>
 49 
 50 namespace flens { namespace lapack {
 51 
 52 //== generic lapack implementation =============================================
 53 
 54 template <typename MA, typename JPIV, typename VTAU,
 55           typename VN1, typename VN2, typename VWORK>
 56 void
 57 laqp2_generic(typename GeMatrix<MA>::IndexType  offset,
 58               GeMatrix<MA>                      &A,
 59               DenseVector<JPIV>                 &jPiv,
 60               DenseVector<VTAU>                 &tau,
 61               DenseVector<VN1>                  &vn1,
 62               DenseVector<VN2>                  &vn2,
 63               DenseVector<VWORK>                &work)
 64 {
 65     using std::abs;
 66     using std::max;
 67     using std::min;
 68     using std::sqrt;
 69     using std::swap;
 70 
 71     typedef typename GeMatrix<MA>::ElementType  ElementType;
 72     typedef typename GeMatrix<MA>::IndexType    IndexType;
 73 
 74     const Underscore<IndexType> _;
 75 
 76     const IndexType m  = A.numRows();
 77     const IndexType n  = A.numCols();
 78 
 79     const IndexType mn = min(m-offset, n);
 80 
 81     const ElementType Zero(0), One(1);
 82     const ElementType tol3z = sqrt(lamch<ElementType>(Eps));
 83 //
 84 //  Compute factorization.
 85 //
 86     for (IndexType i=1; i<=mn; ++i) {
 87 
 88         IndexType offpi = offset + i;
 89 //
 90 //      Determine ith pivot column and swap if necessary.
 91 //
 92         IndexType pvt = (i-1) + blas::iamax(vn1(_(i,n)));
 93 
 94         if (pvt!=i) {
 95             blas::swap(A(_,pvt), A(_,i));
 96             swap(jPiv(pvt), jPiv(i));
 97             vn1(pvt) = vn1(i);
 98             vn2(pvt) = vn2(i);
 99         }
100 //
101 //      Generate elementary reflector H(i).
102 //
103         if (offpi<m) {
104             larfg(m-offpi+1, A(offpi,i), A(_(offpi+1,m),i), tau(i));
105         } else {
106             larfg(1, A(m,i), A(_(m+1,m),i), tau(i));
107         }
108 
109         if (i<=n) {
110 //
111 //          Apply H(i)**T to A(offset+i:m,i+1:n) from the left.
112 //
113             const ElementType Aii = A(offpi,i);
114             A(offpi,i) = One;
115             larf(Left, A(_(offpi,m),i), tau(i), A(_(offpi,m),_(i+1,n)),
116                  work(_(1,n)));
117             A(offpi,i) = Aii;
118         }
119 //
120 //      Update partial column norms.
121 //
122         for (IndexType j=i+1; j<=n; ++j) {
123             if (vn1(j)!=Zero) {
124 //
125 //              NOTE: The following 4 lines follow from the analysis in
126 //              Lapack Working Note 176.
127 //
128                 ElementType tmp = One - pow(abs(A(offpi,j))/vn1(j),2);
129                 tmp = max(tmp, Zero);
130                 ElementType tmp2 = tmp*pow(vn1(j)/vn2(j), 2);
131                 if (tmp2<=tol3z) {
132                     if (offpi<m) {
133                         vn1(j) = blas::nrm2(A(_(offpi+1,m),j));
134                         vn2(j) = vn1(j);
135                     } else {
136                         vn1(j) = Zero;
137                         vn2(j) = Zero;
138                     }
139                 } else {
140                     vn1(j) *= sqrt(tmp);
141                 }
142             }
143         }
144 
145     }
146 }
147 
148 //== interface for native lapack ===============================================
149 
150 #ifdef CHECK_CXXLAPACK
151 
152 template <typename MA, typename JPIV, typename VTAU,
153           typename VN1, typename VN2, typename VWORK>
154 void
155 laqp2_native(typename GeMatrix<MA>::IndexType  offset,
156              GeMatrix<MA>                      &A,
157              DenseVector<JPIV>                 &jPiv,
158              DenseVector<VTAU>                 &tau,
159              DenseVector<VN1>                  &vn1,
160              DenseVector<VN2>                  &vn2,
161              DenseVector<VWORK>                &work)
162 {
163     typedef typename GeMatrix<MA>::ElementType  T;
164 
165     const INTEGER    M      = A.numRows();
166     const INTEGER    N      = A.numCols();
167     const INTEGER    OFFSET = offset;
168     const INTEGER    LDA    = A.leadingDimension();
169 
170     if (IsSame<T, DOUBLE>::value) {
171         LAPACK_DECL(dlaqp2)(&M,
172                             &N,
173                             &OFFSET,
174                             A.data(),
175                             &LDA,
176                             jPiv.data(),
177                             tau.data(),
178                             vn1.data(),
179                             vn2.data(),
180                             work.data());
181     } else {
182         ASSERT(0);
183     }
184 }
185 
186 #endif // CHECK_CXXLAPACK
187 
188 //== public interface ==========================================================
189 
190 template <typename MA, typename JPIV, typename VTAU,
191           typename VN1, typename VN2, typename VWORK>
192 void
193 laqp2(typename GeMatrix<MA>::IndexType  offset,
194       GeMatrix<MA>                      &A,
195       DenseVector<JPIV>                 &jPiv,
196       DenseVector<VTAU>                 &tau,
197       DenseVector<VN1>                  &vn1,
198       DenseVector<VN2>                  &vn2,
199       DenseVector<VWORK>                &work)
200 {
201     std::cerr << "enter: laqp2" << std::endl;
202     using std::min;
203     typedef typename GeMatrix<MA>::ElementType  ElementType;
204     typedef typename GeMatrix<MA>::IndexType    IndexType;
205 
206 #   ifndef NDEBUG
207 //
208 //  Test the input parameters
209 //
210     ASSERT(A.firstRow()==1);
211     ASSERT(A.firstCol()==1);
212     ASSERT(jPiv.firstIndex()==1);
213     ASSERT(tau.firstIndex()==1);
214     ASSERT(vn1.firstIndex()==1);
215     ASSERT(vn2.firstIndex()==1);
216     ASSERT(work.firstIndex()==1);
217 
218     const IndexType m = A.numRows();
219     const IndexType n = A.numCols();
220 
221     // Lehn: I think there is a bug in the DLAQP2 documentation.  This should
222     //       be the length of tau.
223     const IndexType k = min(m-offset, n);
224 
225     ASSERT(jPiv.length()==n);
226     ASSERT(tau.length()==k);
227     ASSERT(vn1.length()==n);
228     ASSERT(vn1.length()==n);
229     ASSERT(work.length()==n);
230 #   endif
231 
232 #   ifdef CHECK_CXXLAPACK
233 //
234 //  Make copies of output arguments
235 //
236     typename GeMatrix<MA>::NoView       A_org      = A;
237     typename DenseVector<JPIV>::NoView  jPiv_org   = jPiv;
238     typename DenseVector<VTAU>::NoView  tau_org    = tau;
239     typename DenseVector<VN1>::NoView   vn1_org    = vn1;
240     typename DenseVector<VN2>::NoView   vn2_org    = vn2;
241     typename DenseVector<VWORK>::NoView work_org   = work;
242 #   endif
243 
244 //
245 //  Call implementation
246 //
247     laqp2_generic(offset, A, jPiv, tau, vn1, vn2, work);
248 
249 #   ifdef CHECK_CXXLAPACK
250 //
251 //  Restore output arguments
252 //
253     typename GeMatrix<MA>::NoView       A_generic    = A;
254     typename DenseVector<JPIV>::NoView  jPiv_generic = jPiv;
255     typename DenseVector<VTAU>::NoView  tau_generic  = tau;
256     typename DenseVector<VN1>::NoView   vn1_generic  = vn1;
257     typename DenseVector<VN2>::NoView   vn2_generic  = vn2;
258     typename DenseVector<VWORK>::NoView work_generic = work;
259 
260     A    = A_org;
261     jPiv = jPiv_org;
262     tau  = tau_org;
263     vn1  = vn1_org;
264     vn2  = vn2_org;
265     work = work_org;
266 
267 //
268 //  Compare results
269 //
270     laqp2_native(offset, A, jPiv, tau, vn1, vn2, work);
271 
272     bool failed = false;
273     if (! isIdentical(A_generic, A, "A_generic""A")) {
274         std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
275         std::cerr << "F77LAPACK: A = " << A << std::endl;
276         failed = true;
277     }
278 
279     if (! isIdentical(jPiv_generic, jPiv, "jPiv_generic""jPiv")) {
280         std::cerr << "CXXLAPACK: jPiv_generic = " << jPiv_generic << std::endl;
281         std::cerr << "F77LAPACK: jPiv = " << jPiv << std::endl;
282         failed = true;
283     }
284 
285     if (! isIdentical(tau_generic, tau, "tau_generic""tau")) {
286         std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
287         std::cerr << "F77LAPACK: tau = " << tau << std::endl;
288         failed = true;
289     }
290 
291     if (! isIdentical(vn1_generic, vn1, "vn1_generic""vn1")) {
292         std::cerr << "CXXLAPACK: vn1_generic = " << vn1_generic << std::endl;
293         std::cerr << "F77LAPACK: vn1 = " << vn1 << std::endl;
294         failed = true;
295     }
296 
297     if (! isIdentical(vn2_generic, vn2, "vn2_generic""vn2")) {
298         std::cerr << "CXXLAPACK: vn2_generic = " << vn2_generic << std::endl;
299         std::cerr << "F77LAPACK: vn2 = " << vn2 << std::endl;
300         failed = true;
301     }
302 
303     if (! isIdentical(work_generic, work, "work_generic""work")) {
304         std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
305         std::cerr << "F77LAPACK: work = " << work << std::endl;
306         failed = true;
307     }
308 
309     if (failed) {
310         ASSERT(0);
311     }
312 #   endif
313 
314     std::cerr << "leave: laqp2" << std::endl;
315 }
316 
317 //-- forwarding ----------------------------------------------------------------
318 template <typename MA, typename JPIV, typename VTAU,
319           typename VN1, typename VN2, typename VWORK>
320 void
321 laqp2(typename MA::IndexType  offset,
322       MA                      &&A,
323       JPIV                    &&jPiv,
324       VTAU                    &&tau,
325       VN1                     &&vn1,
326       VN2                     &&vn2,
327       VWORK                   &&work)
328 {
329     CHECKPOINT_ENTER;
330     laqp2(offset, A, jPiv, tau, vn1, vn2, work);
331     CHECKPOINT_LEAVE;
332 }
333 
334 } } // namespace lapack, flens
335 
336 #endif // FLENS_LAPACK_QR_LAQP2_TCC