1 /*
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
36 $ VN2, AUXV, F, LDF )
37 *
38 * -- LAPACK auxiliary routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_QR_LAQPS_TCC
45 #define FLENS_LAPACK_QR_LAQPS_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename MA, typename JPIV, typename VTAU,
55 typename VN1, typename VN2, typename VAUX,
56 typename MF>
57 void
58 laqps_generic(typename GeMatrix<MA>::IndexType offset,
59 typename GeMatrix<MA>::IndexType nb,
60 typename GeMatrix<MA>::IndexType &kb,
61 GeMatrix<MA> &A,
62 DenseVector<JPIV> &jPiv,
63 DenseVector<VTAU> &tau,
64 DenseVector<VN1> &vn1,
65 DenseVector<VN2> &vn2,
66 DenseVector<VAUX> &aux,
67 GeMatrix<MF> &F)
68 {
69 using std::abs;
70 using std::max;
71 using std::min;
72 using std::sqrt;
73 using std::swap;
74
75 typedef typename GeMatrix<MA>::ElementType ElementType;
76 typedef typename GeMatrix<MA>::IndexType IndexType;
77
78 const Underscore<IndexType> _;
79
80 const IndexType m = A.numRows();
81 const IndexType n = A.numCols();
82
83 const IndexType lastRk = min(m, n+offset);
84
85 IndexType lasticc = 0;
86 IndexType k = 0;
87
88 const ElementType Zero(0), One(1);
89 const ElementType tol3z = sqrt(lamch<ElementType>(Eps));
90 //
91 // Beginning of while loop.
92 //
93 while (k<nb && lasticc==0) {
94 ++k;
95
96 const IndexType rk = offset + k;
97 //
98 // Determine ith pivot column and swap if necessary
99 //
100 IndexType pvt = (k-1) + blas::iamax(vn1(_(k,n)));
101 if (pvt!=k) {
102 blas::swap(A(_,pvt), A(_,k));
103 blas::swap(F(pvt,_(1,k-1)), F(k,_(1,k-1)));
104 swap(jPiv(pvt),jPiv(k));
105 vn1(pvt) = vn1(k);
106 vn2(pvt) = vn2(k);
107 }
108 //
109 // Apply previous Householder reflectors to column K:
110 // A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**T.
111 //
112 if (k>1) {
113 blas::mv(NoTrans, -One, A(_(rk,m),_(1,k-1)), F(k,_(1,k-1)),
114 One, A(_(rk,m),k));
115 }
116 //
117 // Generate elementary reflector H(k).
118 //
119 if (rk<m) {
120 larfg(m-rk+1, A(rk,k), A(_(rk+1,m),k), tau(k));
121 } else {
122 larfg(1, A(m,k), A(_(m+1,m),k), tau(k));
123 }
124
125 const ElementType Akk = A(rk,k);
126 A(rk,k) = One;
127 //
128 // Compute Kth column of F:
129 //
130 // Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**T*A(RK:M,K).
131 //
132 if (k<n) {
133 blas::mv(Trans, tau(k), A(_(rk,m),_(k+1,n)), A(_(rk,m),k),
134 Zero, F(_(k+1,n),k));
135 }
136 //
137 // Padding F(1:K,K) with zeros.
138 //
139 F(_(1,k),k) = Zero;
140 //
141 // Incremental updating of F:
142 // F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**T
143 // *A(RK:M,K).
144 //
145 if (k>1) {
146 blas::mv(Trans, -tau(k), A(_(rk,m),_(1,k-1)), A(_(rk,m),k),
147 Zero, aux(_(1,k-1)));
148 blas::mv(NoTrans, One, F(_,_(1,k-1)), aux(_(1,k-1)),
149 One, F(_,k));
150 }
151 //
152 // Update the current row of A:
153 // A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**T.
154 //
155 if (k<n) {
156 blas::mv(NoTrans, -One, F(_(k+1,n),_(1,k)), A(rk,_(1,k)),
157 One, A(rk,_(k+1,n)));
158 }
159 //
160 // Update partial column norms.
161 //
162 if (rk<lastRk) {
163 for (IndexType j=k+1; j<=n; ++j) {
164 if (vn1(j)!=Zero) {
165 //
166 // NOTE: The following 4 lines follow from the analysis in
167 // Lapack Working Note 176.
168 //
169 ElementType tmp = abs(A(rk,j)) / vn1(j);
170 tmp = max(Zero, (One+tmp)*(One-tmp));
171 ElementType tmp2 = tmp*pow(vn1(j)/vn2(j),2);
172 if (tmp2<=tol3z) {
173 vn2(j) = ElementType(lasticc);
174 lasticc = j;
175 } else {
176 vn1(j) *= sqrt(tmp);
177 }
178 }
179 }
180 }
181
182 A(rk,k) = Akk;
183 //
184 // End of while loop.
185 //
186 }
187 kb = k;
188 const IndexType rk = offset + kb;
189 //
190 // Apply the block reflector to the rest of the matrix:
191 // A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
192 // A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**T.
193 //
194 if (kb<min(n,m-offset)) {
195 blas::mm(NoTrans, Trans,
196 -One, A(_(rk+1,m),_(1,kb)), F(_(kb+1,n),_(1,kb)),
197 One, A(_(rk+1,m),_(kb+1,n)));
198 }
199 //
200 // Recomputation of difficult columns.
201 //
202 while (lasticc>0) {
203 IndexType iTmp = nint(vn2(lasticc));
204 vn1(lasticc) = blas::nrm2(A(_(rk+1,m),lasticc));
205 //
206 // NOTE: The computation of VN1( LSTICC ) relies on the fact that
207 // SNRM2 does not fail on vectors with norm below the value of
208 // SQRT(DLAMCH('S'))
209 //
210 vn2(lasticc) = vn1(lasticc);
211 lasticc = iTmp;
212 }
213 }
214
215 //== interface for native lapack ===============================================
216
217 #ifdef CHECK_CXXLAPACK
218
219 template <typename MA, typename JPIV, typename VTAU,
220 typename VN1, typename VN2, typename VAUX,
221 typename MF>
222 void
223 laqps_native(typename GeMatrix<MA>::IndexType offset,
224 typename GeMatrix<MA>::IndexType nb,
225 typename GeMatrix<MA>::IndexType &kb,
226 GeMatrix<MA> &A,
227 DenseVector<JPIV> &jPiv,
228 DenseVector<VTAU> &tau,
229 DenseVector<VN1> &vn1,
230 DenseVector<VN2> &vn2,
231 DenseVector<VAUX> &aux,
232 GeMatrix<MF> &F)
233 {
234 typedef typename GeMatrix<MA>::ElementType T;
235
236 const INTEGER M = A.numRows();
237 const INTEGER N = A.numCols();
238 const INTEGER OFFSET = offset;
239 const INTEGER NB = nb;
240 INTEGER KB = kb;
241 const INTEGER LDA = A.leadingDimension();
242 const INTEGER LDF = F.leadingDimension();
243
244 if (IsSame<T, DOUBLE>::value) {
245 LAPACK_DECL(dlaqps)(&M,
246 &N,
247 &OFFSET,
248 &NB,
249 &KB,
250 A.data(),
251 &LDA,
252 jPiv.data(),
253 tau.data(),
254 vn1.data(),
255 vn2.data(),
256 aux.data(),
257 F.data(),
258 &LDF);
259 kb = KB;
260 } else {
261 ASSERT(0);
262 }
263 }
264
265 #endif // CHECK_CXXLAPACK
266
267 //== public interface ==========================================================
268 template <typename MA, typename JPIV, typename VTAU,
269 typename VN1, typename VN2, typename VAUX,
270 typename MF>
271 void
272 laqps(typename GeMatrix<MA>::IndexType offset,
273 typename GeMatrix<MA>::IndexType nb,
274 typename GeMatrix<MA>::IndexType &kb,
275 GeMatrix<MA> &A,
276 DenseVector<JPIV> &jPiv,
277 DenseVector<VTAU> &tau,
278 DenseVector<VN1> &vn1,
279 DenseVector<VN2> &vn2,
280 DenseVector<VAUX> &aux,
281 GeMatrix<MF> &F)
282 {
283 std::cerr << "enter: laqps" << std::endl;
284 using std::min;
285 typedef typename GeMatrix<MA>::ElementType ElementType;
286 typedef typename GeMatrix<MA>::IndexType IndexType;
287
288 # ifndef NDEBUG
289 //
290 // Test the input parameters
291 //
292 ASSERT(A.firstRow()==1);
293 ASSERT(A.firstCol()==1);
294 ASSERT(jPiv.firstIndex()==1);
295 ASSERT(tau.firstIndex()==1);
296 ASSERT(vn1.firstIndex()==1);
297 ASSERT(vn2.firstIndex()==1);
298 ASSERT(aux.firstIndex()==1);
299 ASSERT(F.firstRow()==1);
300 ASSERT(F.firstCol()==1);
301
302 const IndexType n = A.numCols();
303
304 ASSERT(jPiv.length()==n);
305 std::cerr << "tau.length() = " << tau.length() << std::endl;
306 std::cerr << "nb = " << nb << std::endl;
307 std::cerr << "kb = " << kb << std::endl;
308 ASSERT(tau.length()>=nb);
309 ASSERT(vn1.length()==n);
310 ASSERT(vn1.length()==n);
311 ASSERT(aux.length()==nb);
312 ASSERT(F.numRows()==n);
313 ASSERT(F.numCols()==nb);
314 # endif
315
316 # ifdef CHECK_CXXLAPACK
317 //
318 // Make copies of output arguments
319 //
320 typename GeMatrix<MA>::NoView A_org = A;
321 typename DenseVector<JPIV>::NoView jPiv_org = jPiv;
322 typename DenseVector<VTAU>::NoView tau_org = tau;
323 typename DenseVector<VN1>::NoView vn1_org = vn1;
324 typename DenseVector<VN2>::NoView vn2_org = vn2;
325 typename DenseVector<VAUX>::NoView aux_org = aux;
326 typename GeMatrix<MF>::NoView F_org = F;
327 # endif
328
329 //
330 // Call implementation
331 //
332 laqps_generic(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
333
334 # ifdef CHECK_CXXLAPACK
335 //
336 // Restore output arguments
337 //
338 typename GeMatrix<MA>::NoView A_generic = A;
339 typename DenseVector<JPIV>::NoView jPiv_generic = jPiv;
340 typename DenseVector<VTAU>::NoView tau_generic = tau;
341 typename DenseVector<VN1>::NoView vn1_generic = vn1;
342 typename DenseVector<VN2>::NoView vn2_generic = vn2;
343 typename DenseVector<VAUX>::NoView aux_generic = aux;
344 typename GeMatrix<MF>::NoView F_generic = F;
345
346 A = A_org;
347 jPiv = jPiv_org;
348 tau = tau_org;
349 vn1 = vn1_org;
350 vn2 = vn2_org;
351 aux = aux_org;
352 F = F_org;
353
354 //
355 // Compare results
356 //
357 laqps_native(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
358
359 bool failed = false;
360 if (! isIdentical(A_generic, A, "A_generic", "A")) {
361 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
362 std::cerr << "F77LAPACK: A = " << A << std::endl;
363 failed = true;
364 }
365
366 if (! isIdentical(jPiv_generic, jPiv, "jPiv_generic", "jPiv")) {
367 std::cerr << "CXXLAPACK: jPiv_generic = " << jPiv_generic << std::endl;
368 std::cerr << "F77LAPACK: jPiv = " << jPiv << std::endl;
369 failed = true;
370 }
371
372 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
373 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
374 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
375 failed = true;
376 }
377
378 if (! isIdentical(vn1_generic, vn1, "vn1_generic", "vn1")) {
379 std::cerr << "CXXLAPACK: vn1_generic = " << vn1_generic << std::endl;
380 std::cerr << "F77LAPACK: vn1 = " << vn1 << std::endl;
381 failed = true;
382 }
383
384 if (! isIdentical(vn2_generic, vn2, "vn2_generic", "vn2")) {
385 std::cerr << "CXXLAPACK: vn2_generic = " << vn2_generic << std::endl;
386 std::cerr << "F77LAPACK: vn2 = " << vn2 << std::endl;
387 failed = true;
388 }
389
390 if (! isIdentical(aux_generic, aux, "aux_generic", "aux")) {
391 std::cerr << "CXXLAPACK: aux_generic = " << aux_generic << std::endl;
392 std::cerr << "F77LAPACK: aux = " << aux << std::endl;
393 failed = true;
394 }
395
396 if (! isIdentical(F_generic, F, "F_generic", "F")) {
397 std::cerr << "CXXLAPACK: F_generic = " << F_generic << std::endl;
398 std::cerr << "F77LAPACK: F = " << F << std::endl;
399 failed = true;
400 }
401
402 if (failed) {
403 ASSERT(0);
404 }
405 # endif
406
407 std::cerr << "leave: laqps" << std::endl;
408 }
409
410 //-- forwarding ----------------------------------------------------------------
411 template <typename MA, typename JPIV, typename VTAU,
412 typename VN1, typename VN2, typename VAUX,
413 typename MF>
414 void
415 laqps(typename MA::IndexType offset,
416 typename MA::IndexType nb,
417 typename MA::IndexType &kb,
418 MA &&A,
419 JPIV &&jPiv,
420 VTAU &&tau,
421 VN1 &&vn1,
422 VN2 &&vn2,
423 VAUX &&aux,
424 MF &&F)
425 {
426 CHECKPOINT_ENTER;
427 laqps(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
428 CHECKPOINT_LEAVE;
429 }
430
431 } } // namespace lapack, flens
432
433 #endif // FLENS_LAPACK_QR_LAQPS_TCC
2 * Copyright (c) 2012, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DLAQPS( M, N, OFFSET, NB, KB, A, LDA, JPVT, TAU, VN1,
36 $ VN2, AUXV, F, LDF )
37 *
38 * -- LAPACK auxiliary routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_QR_LAQPS_TCC
45 #define FLENS_LAPACK_QR_LAQPS_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename MA, typename JPIV, typename VTAU,
55 typename VN1, typename VN2, typename VAUX,
56 typename MF>
57 void
58 laqps_generic(typename GeMatrix<MA>::IndexType offset,
59 typename GeMatrix<MA>::IndexType nb,
60 typename GeMatrix<MA>::IndexType &kb,
61 GeMatrix<MA> &A,
62 DenseVector<JPIV> &jPiv,
63 DenseVector<VTAU> &tau,
64 DenseVector<VN1> &vn1,
65 DenseVector<VN2> &vn2,
66 DenseVector<VAUX> &aux,
67 GeMatrix<MF> &F)
68 {
69 using std::abs;
70 using std::max;
71 using std::min;
72 using std::sqrt;
73 using std::swap;
74
75 typedef typename GeMatrix<MA>::ElementType ElementType;
76 typedef typename GeMatrix<MA>::IndexType IndexType;
77
78 const Underscore<IndexType> _;
79
80 const IndexType m = A.numRows();
81 const IndexType n = A.numCols();
82
83 const IndexType lastRk = min(m, n+offset);
84
85 IndexType lasticc = 0;
86 IndexType k = 0;
87
88 const ElementType Zero(0), One(1);
89 const ElementType tol3z = sqrt(lamch<ElementType>(Eps));
90 //
91 // Beginning of while loop.
92 //
93 while (k<nb && lasticc==0) {
94 ++k;
95
96 const IndexType rk = offset + k;
97 //
98 // Determine ith pivot column and swap if necessary
99 //
100 IndexType pvt = (k-1) + blas::iamax(vn1(_(k,n)));
101 if (pvt!=k) {
102 blas::swap(A(_,pvt), A(_,k));
103 blas::swap(F(pvt,_(1,k-1)), F(k,_(1,k-1)));
104 swap(jPiv(pvt),jPiv(k));
105 vn1(pvt) = vn1(k);
106 vn2(pvt) = vn2(k);
107 }
108 //
109 // Apply previous Householder reflectors to column K:
110 // A(RK:M,K) := A(RK:M,K) - A(RK:M,1:K-1)*F(K,1:K-1)**T.
111 //
112 if (k>1) {
113 blas::mv(NoTrans, -One, A(_(rk,m),_(1,k-1)), F(k,_(1,k-1)),
114 One, A(_(rk,m),k));
115 }
116 //
117 // Generate elementary reflector H(k).
118 //
119 if (rk<m) {
120 larfg(m-rk+1, A(rk,k), A(_(rk+1,m),k), tau(k));
121 } else {
122 larfg(1, A(m,k), A(_(m+1,m),k), tau(k));
123 }
124
125 const ElementType Akk = A(rk,k);
126 A(rk,k) = One;
127 //
128 // Compute Kth column of F:
129 //
130 // Compute F(K+1:N,K) := tau(K)*A(RK:M,K+1:N)**T*A(RK:M,K).
131 //
132 if (k<n) {
133 blas::mv(Trans, tau(k), A(_(rk,m),_(k+1,n)), A(_(rk,m),k),
134 Zero, F(_(k+1,n),k));
135 }
136 //
137 // Padding F(1:K,K) with zeros.
138 //
139 F(_(1,k),k) = Zero;
140 //
141 // Incremental updating of F:
142 // F(1:N,K) := F(1:N,K) - tau(K)*F(1:N,1:K-1)*A(RK:M,1:K-1)**T
143 // *A(RK:M,K).
144 //
145 if (k>1) {
146 blas::mv(Trans, -tau(k), A(_(rk,m),_(1,k-1)), A(_(rk,m),k),
147 Zero, aux(_(1,k-1)));
148 blas::mv(NoTrans, One, F(_,_(1,k-1)), aux(_(1,k-1)),
149 One, F(_,k));
150 }
151 //
152 // Update the current row of A:
153 // A(RK,K+1:N) := A(RK,K+1:N) - A(RK,1:K)*F(K+1:N,1:K)**T.
154 //
155 if (k<n) {
156 blas::mv(NoTrans, -One, F(_(k+1,n),_(1,k)), A(rk,_(1,k)),
157 One, A(rk,_(k+1,n)));
158 }
159 //
160 // Update partial column norms.
161 //
162 if (rk<lastRk) {
163 for (IndexType j=k+1; j<=n; ++j) {
164 if (vn1(j)!=Zero) {
165 //
166 // NOTE: The following 4 lines follow from the analysis in
167 // Lapack Working Note 176.
168 //
169 ElementType tmp = abs(A(rk,j)) / vn1(j);
170 tmp = max(Zero, (One+tmp)*(One-tmp));
171 ElementType tmp2 = tmp*pow(vn1(j)/vn2(j),2);
172 if (tmp2<=tol3z) {
173 vn2(j) = ElementType(lasticc);
174 lasticc = j;
175 } else {
176 vn1(j) *= sqrt(tmp);
177 }
178 }
179 }
180 }
181
182 A(rk,k) = Akk;
183 //
184 // End of while loop.
185 //
186 }
187 kb = k;
188 const IndexType rk = offset + kb;
189 //
190 // Apply the block reflector to the rest of the matrix:
191 // A(OFFSET+KB+1:M,KB+1:N) := A(OFFSET+KB+1:M,KB+1:N) -
192 // A(OFFSET+KB+1:M,1:KB)*F(KB+1:N,1:KB)**T.
193 //
194 if (kb<min(n,m-offset)) {
195 blas::mm(NoTrans, Trans,
196 -One, A(_(rk+1,m),_(1,kb)), F(_(kb+1,n),_(1,kb)),
197 One, A(_(rk+1,m),_(kb+1,n)));
198 }
199 //
200 // Recomputation of difficult columns.
201 //
202 while (lasticc>0) {
203 IndexType iTmp = nint(vn2(lasticc));
204 vn1(lasticc) = blas::nrm2(A(_(rk+1,m),lasticc));
205 //
206 // NOTE: The computation of VN1( LSTICC ) relies on the fact that
207 // SNRM2 does not fail on vectors with norm below the value of
208 // SQRT(DLAMCH('S'))
209 //
210 vn2(lasticc) = vn1(lasticc);
211 lasticc = iTmp;
212 }
213 }
214
215 //== interface for native lapack ===============================================
216
217 #ifdef CHECK_CXXLAPACK
218
219 template <typename MA, typename JPIV, typename VTAU,
220 typename VN1, typename VN2, typename VAUX,
221 typename MF>
222 void
223 laqps_native(typename GeMatrix<MA>::IndexType offset,
224 typename GeMatrix<MA>::IndexType nb,
225 typename GeMatrix<MA>::IndexType &kb,
226 GeMatrix<MA> &A,
227 DenseVector<JPIV> &jPiv,
228 DenseVector<VTAU> &tau,
229 DenseVector<VN1> &vn1,
230 DenseVector<VN2> &vn2,
231 DenseVector<VAUX> &aux,
232 GeMatrix<MF> &F)
233 {
234 typedef typename GeMatrix<MA>::ElementType T;
235
236 const INTEGER M = A.numRows();
237 const INTEGER N = A.numCols();
238 const INTEGER OFFSET = offset;
239 const INTEGER NB = nb;
240 INTEGER KB = kb;
241 const INTEGER LDA = A.leadingDimension();
242 const INTEGER LDF = F.leadingDimension();
243
244 if (IsSame<T, DOUBLE>::value) {
245 LAPACK_DECL(dlaqps)(&M,
246 &N,
247 &OFFSET,
248 &NB,
249 &KB,
250 A.data(),
251 &LDA,
252 jPiv.data(),
253 tau.data(),
254 vn1.data(),
255 vn2.data(),
256 aux.data(),
257 F.data(),
258 &LDF);
259 kb = KB;
260 } else {
261 ASSERT(0);
262 }
263 }
264
265 #endif // CHECK_CXXLAPACK
266
267 //== public interface ==========================================================
268 template <typename MA, typename JPIV, typename VTAU,
269 typename VN1, typename VN2, typename VAUX,
270 typename MF>
271 void
272 laqps(typename GeMatrix<MA>::IndexType offset,
273 typename GeMatrix<MA>::IndexType nb,
274 typename GeMatrix<MA>::IndexType &kb,
275 GeMatrix<MA> &A,
276 DenseVector<JPIV> &jPiv,
277 DenseVector<VTAU> &tau,
278 DenseVector<VN1> &vn1,
279 DenseVector<VN2> &vn2,
280 DenseVector<VAUX> &aux,
281 GeMatrix<MF> &F)
282 {
283 std::cerr << "enter: laqps" << std::endl;
284 using std::min;
285 typedef typename GeMatrix<MA>::ElementType ElementType;
286 typedef typename GeMatrix<MA>::IndexType IndexType;
287
288 # ifndef NDEBUG
289 //
290 // Test the input parameters
291 //
292 ASSERT(A.firstRow()==1);
293 ASSERT(A.firstCol()==1);
294 ASSERT(jPiv.firstIndex()==1);
295 ASSERT(tau.firstIndex()==1);
296 ASSERT(vn1.firstIndex()==1);
297 ASSERT(vn2.firstIndex()==1);
298 ASSERT(aux.firstIndex()==1);
299 ASSERT(F.firstRow()==1);
300 ASSERT(F.firstCol()==1);
301
302 const IndexType n = A.numCols();
303
304 ASSERT(jPiv.length()==n);
305 std::cerr << "tau.length() = " << tau.length() << std::endl;
306 std::cerr << "nb = " << nb << std::endl;
307 std::cerr << "kb = " << kb << std::endl;
308 ASSERT(tau.length()>=nb);
309 ASSERT(vn1.length()==n);
310 ASSERT(vn1.length()==n);
311 ASSERT(aux.length()==nb);
312 ASSERT(F.numRows()==n);
313 ASSERT(F.numCols()==nb);
314 # endif
315
316 # ifdef CHECK_CXXLAPACK
317 //
318 // Make copies of output arguments
319 //
320 typename GeMatrix<MA>::NoView A_org = A;
321 typename DenseVector<JPIV>::NoView jPiv_org = jPiv;
322 typename DenseVector<VTAU>::NoView tau_org = tau;
323 typename DenseVector<VN1>::NoView vn1_org = vn1;
324 typename DenseVector<VN2>::NoView vn2_org = vn2;
325 typename DenseVector<VAUX>::NoView aux_org = aux;
326 typename GeMatrix<MF>::NoView F_org = F;
327 # endif
328
329 //
330 // Call implementation
331 //
332 laqps_generic(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
333
334 # ifdef CHECK_CXXLAPACK
335 //
336 // Restore output arguments
337 //
338 typename GeMatrix<MA>::NoView A_generic = A;
339 typename DenseVector<JPIV>::NoView jPiv_generic = jPiv;
340 typename DenseVector<VTAU>::NoView tau_generic = tau;
341 typename DenseVector<VN1>::NoView vn1_generic = vn1;
342 typename DenseVector<VN2>::NoView vn2_generic = vn2;
343 typename DenseVector<VAUX>::NoView aux_generic = aux;
344 typename GeMatrix<MF>::NoView F_generic = F;
345
346 A = A_org;
347 jPiv = jPiv_org;
348 tau = tau_org;
349 vn1 = vn1_org;
350 vn2 = vn2_org;
351 aux = aux_org;
352 F = F_org;
353
354 //
355 // Compare results
356 //
357 laqps_native(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
358
359 bool failed = false;
360 if (! isIdentical(A_generic, A, "A_generic", "A")) {
361 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
362 std::cerr << "F77LAPACK: A = " << A << std::endl;
363 failed = true;
364 }
365
366 if (! isIdentical(jPiv_generic, jPiv, "jPiv_generic", "jPiv")) {
367 std::cerr << "CXXLAPACK: jPiv_generic = " << jPiv_generic << std::endl;
368 std::cerr << "F77LAPACK: jPiv = " << jPiv << std::endl;
369 failed = true;
370 }
371
372 if (! isIdentical(tau_generic, tau, "tau_generic", "tau")) {
373 std::cerr << "CXXLAPACK: tau_generic = " << tau_generic << std::endl;
374 std::cerr << "F77LAPACK: tau = " << tau << std::endl;
375 failed = true;
376 }
377
378 if (! isIdentical(vn1_generic, vn1, "vn1_generic", "vn1")) {
379 std::cerr << "CXXLAPACK: vn1_generic = " << vn1_generic << std::endl;
380 std::cerr << "F77LAPACK: vn1 = " << vn1 << std::endl;
381 failed = true;
382 }
383
384 if (! isIdentical(vn2_generic, vn2, "vn2_generic", "vn2")) {
385 std::cerr << "CXXLAPACK: vn2_generic = " << vn2_generic << std::endl;
386 std::cerr << "F77LAPACK: vn2 = " << vn2 << std::endl;
387 failed = true;
388 }
389
390 if (! isIdentical(aux_generic, aux, "aux_generic", "aux")) {
391 std::cerr << "CXXLAPACK: aux_generic = " << aux_generic << std::endl;
392 std::cerr << "F77LAPACK: aux = " << aux << std::endl;
393 failed = true;
394 }
395
396 if (! isIdentical(F_generic, F, "F_generic", "F")) {
397 std::cerr << "CXXLAPACK: F_generic = " << F_generic << std::endl;
398 std::cerr << "F77LAPACK: F = " << F << std::endl;
399 failed = true;
400 }
401
402 if (failed) {
403 ASSERT(0);
404 }
405 # endif
406
407 std::cerr << "leave: laqps" << std::endl;
408 }
409
410 //-- forwarding ----------------------------------------------------------------
411 template <typename MA, typename JPIV, typename VTAU,
412 typename VN1, typename VN2, typename VAUX,
413 typename MF>
414 void
415 laqps(typename MA::IndexType offset,
416 typename MA::IndexType nb,
417 typename MA::IndexType &kb,
418 MA &&A,
419 JPIV &&jPiv,
420 VTAU &&tau,
421 VN1 &&vn1,
422 VN2 &&vn2,
423 VAUX &&aux,
424 MF &&F)
425 {
426 CHECKPOINT_ENTER;
427 laqps(offset, nb, kb, A, jPiv, tau, vn1, vn2, aux, F);
428 CHECKPOINT_LEAVE;
429 }
430
431 } } // namespace lapack, flens
432
433 #endif // FLENS_LAPACK_QR_LAQPS_TCC