1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
36 $ WORK, LWORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_LQ_ORMLQ_TCC
45 #define FLENS_LAPACK_LQ_ORMLQ_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename MA, typename MC>
55 typename GeMatrix<MC>::IndexType
56 ormlq_generic_wsq(Side side,
57 Transpose trans,
58 GeMatrix<MA> &A,
59 GeMatrix<MC> &C)
60 {
61 using std::max;
62 using std::min;
63
64 typedef typename GeMatrix<MC>::ElementType T;
65 typedef typename GeMatrix<MC>::IndexType IndexType;
66
67 typedef typename GeMatrix<MC>::View GeView;
68 typedef typename GeView::Engine GeViewEngine;
69
70 //
71 // Paramter for maximum block size and buffer for TrMatrix Tr.
72 //
73 const IndexType nbMax = 64;
74
75 const IndexType m = C.numRows();
76 const IndexType n = C.numCols();
77 const IndexType k = A.numRows();
78
79 const IndexType nw = (side==Left) ? n : m;
80
81 //
82 // Determine the block size. nb may be at most nbMax, where nbMax
83 // is used to define the local array tr.
84 //
85 char opt[3];
86 opt[0] = char(side);
87 if (trans==NoTrans) {
88 opt[1] = 'N';
89 } else if (trans==Conj) {
90 opt[1] = 'R';
91 } else if (trans==Trans) {
92 opt[1] = 'T';
93 } else if (trans==ConjTrans) {
94 opt[1] = 'C';
95 }
96 opt[2] = 0;
97
98 IndexType nb = min(nbMax, IndexType(ilaenv<T>(1, "ORMLQ", opt, m, n, k)));
99 return max(IndexType(1), nw)*nb;
100 }
101
102 template <typename MA, typename VTAU, typename MC, typename VWORK>
103 void
104 ormlq_generic(Side side,
105 Transpose trans,
106 GeMatrix<MA> &A,
107 const DenseVector<VTAU> &tau,
108 GeMatrix<MC> &C,
109 DenseVector<VWORK> &work)
110 {
111 using std::max;
112 using std::min;
113
114 typedef typename GeMatrix<MC>::ElementType T;
115 typedef typename GeMatrix<MC>::IndexType IndexType;
116
117 typedef typename GeMatrix<MC>::View GeView;
118 typedef typename GeView::Engine GeViewEngine;
119
120 const Underscore<IndexType> _;
121
122
123 //
124 // Paramter for maximum block size and buffer for TrMatrix Tr.
125 //
126 const IndexType nbMax = 64;
127 const IndexType ldt = nbMax + 1;
128 T trBuffer[nbMax*ldt];
129
130 const IndexType m = C.numRows();
131 const IndexType n = C.numCols();
132 const IndexType k = A.numRows();
133
134 const bool noTrans = (trans==Trans || trans==ConjTrans) ? false : true;
135 //
136 // nq is the order of Q and nw is the minimum dimension of work
137 //
138 IndexType nq, nw;
139 if (side==Left) {
140 nq = m;
141 nw = n;
142 } else {
143 nq = n;
144 nw = m;
145 }
146 //
147 // Determine the block size. nb may be at most nbMax, where nbMax
148 // is used to define the local array tr.
149 //
150 char opt[3];
151 opt[0] = (side==Left) ? 'L' : 'R';
152 if (trans==NoTrans) {
153 opt[1] = 'N';
154 } else if (trans==Conj) {
155 opt[1] = 'R';
156 } else if (trans==Trans) {
157 opt[1] = 'T';
158 } else if (trans==ConjTrans) {
159 opt[1] = 'C';
160 }
161 opt[2] = 0;
162
163 IndexType nb = min(nbMax, IndexType(ilaenv<T>(1, "ORMLQ", opt, m, n, k)));
164 IndexType lWorkOpt = max(IndexType(1), nw)*nb;
165
166 if (work.length()==0) {
167 work.resize(lWorkOpt);
168 }
169
170 //
171 // Quick return if possible
172 //
173 if ((m==0) || (n==0) || (k==0)) {
174 work(1) = 1;
175 return;
176 }
177
178 IndexType nbMin = 2;
179 IndexType iws;
180 if ((nb>1) && (nb<k)) {
181 iws = lWorkOpt;
182 if (work.length()<iws) {
183 nb = work.length()/nw;
184 nbMin = max(nbMin, IndexType(ilaenv<T>(2, "ORMLQ", opt, m, n, k)));
185 }
186 } else {
187 iws = nw;
188 }
189
190 if ((nb<nbMin) || (nb>=k)) {
191 //
192 // Use unblocked code
193 //
194 auto _work = (side==Left) ? work(_(1,n)) : work(_(1,m));
195 orml2(side, trans, A, tau, C, _work);
196 } else {
197 //
198 // Use blocked code
199 //
200 IndexType iBeg, iInc, iEnd;
201 if ((side==Left && noTrans) || (side==Right && !noTrans)) {
202 iBeg = 1;
203 iEnd = ((k-1)/nb)*nb + 1;
204 iInc = nb;
205 } else {
206 iBeg = ((k-1)/nb)*nb + 1;
207 iEnd = 1;
208 iInc = -nb;
209 }
210 iEnd += iInc;
211
212 IndexType ic, jc;
213 if (side==Left) {
214 jc = 1;
215 } else {
216 ic = 1;
217 }
218
219 const Transpose transT = (trans==NoTrans) ? Trans : NoTrans;
220
221 typename GeMatrix<MA>::View Work(nw, nb, work);
222
223 for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
224 const IndexType ib = min(nb, k-i+1);
225 GeView Tr = GeViewEngine(ib, ib, trBuffer, ldt);
226 //
227 // Form the triangular factor of the block reflector
228 // H = H(i) H(i+1) . . . H(i+ib-1)
229 //
230 larft(Forward, RowWise, nq-i+1,
231 A(_(i,i+ib-1),_(i,nq)), tau(_(i,i+ib-1)), Tr.upper());
232
233 if (side==Left) {
234 //
235 // H or H**T is applied to C(i:m,1:n)
236 //
237 ic = i;
238 } else {
239 //
240 // H or H**T is applied to C(1:m,i:n)
241 //
242 jc = i;
243 }
244
245 larfb(side, transT,
246 Forward, RowWise,
247 A(_(i,i+ib-1),_(i,nq)), Tr.upper(),
248 C(_(ic,m),_(jc,n)),
249 Work(_,_(1,ib)));
250 }
251 }
252 work(1) = lWorkOpt;
253 }
254
255 //== interface for native lapack ===============================================
256
257 #ifdef CHECK_CXXLAPACK
258
259 template <typename MA, typename MC>
260 typename GeMatrix<MC>::IndexType
261 ormlq_native_wsq(Side side,
262 Transpose trans,
263 GeMatrix<MA> &A,
264 GeMatrix<MC> &C)
265 {
266 typedef typename GeMatrix<MC>::ElementType T;
267
268 const char SIDE = char(side);
269 const char TRANS = getF77LapackChar(trans);
270 const INTEGER M = C.numRows();
271 const INTEGER N = C.numCols();
272 const INTEGER K = A.numRows();
273 const INTEGER LDA = A.leadingDimension();
274 const INTEGER LDC = C.leadingDimension();
275 T WORK, DUMMY;
276 const INTEGER LWORK = -1;
277 INTEGER INFO;
278
279 if (IsSame<T,DOUBLE>::value) {
280 LAPACK_IMPL(dormlq)(&SIDE,
281 &TRANS,
282 &M,
283 &N,
284 &K,
285 A.data(),
286 &LDA,
287 &DUMMY,
288 C.data(),
289 &LDC,
290 &WORK,
291 &LWORK,
292 &INFO);
293 } else {
294 ASSERT(0);
295 }
296
297 ASSERT(INFO>=0);
298 return WORK;
299 }
300
301 template <typename MA, typename VTAU, typename MC, typename VWORK>
302 void
303 ormlq_native(Side side,
304 Transpose trans,
305 GeMatrix<MA> &A,
306 const DenseVector<VTAU> &tau,
307 GeMatrix<MC> &C,
308 DenseVector<VWORK> &work)
309 {
310 typedef typename GeMatrix<MC>::ElementType T;
311
312 const char SIDE = char(side);
313 const char TRANS = getF77LapackChar(trans);
314 const INTEGER M = C.numRows();
315 const INTEGER N = C.numCols();
316 const INTEGER K = A.numRows();
317 const INTEGER LDA = A.leadingDimension();
318 const INTEGER LDC = C.leadingDimension();
319 const INTEGER LWORK = work.length();
320 INTEGER INFO;
321
322 if (IsSame<T,DOUBLE>::value) {
323 LAPACK_IMPL(dormlq)(&SIDE,
324 &TRANS,
325 &M,
326 &N,
327 &K,
328 A.data(),
329 &LDA,
330 tau.data(),
331 C.data(),
332 &LDC,
333 work.data(),
334 &LWORK,
335 &INFO);
336 } else {
337 ASSERT(0);
338 }
339
340 ASSERT(INFO>=0);
341 }
342
343 #endif // CHECK_CXXLAPACK
344
345 //== public interface ==========================================================
346
347 template <typename MA, typename MC>
348 typename GeMatrix<MC>::IndexType
349 ormlq_wsq(Side side,
350 Transpose trans,
351 GeMatrix<MA> &A,
352 GeMatrix<MC> &C)
353 {
354 typedef typename GeMatrix<MC>::IndexType IndexType;
355
356 //
357 // Test the input parameters
358 //
359 # ifndef NDEBUG
360 const IndexType m = C.numRows();
361 const IndexType n = C.numCols();
362 const IndexType k = A.numRows();
363
364 if (side==Left) {
365 ASSERT(A.numCols()==m);
366 } else {
367 ASSERT(A.numCols()==n);
368 }
369 # endif
370
371 //
372 // Call implementation
373 //
374 const IndexType info = ormlq_generic(side, trans, A, C);
375
376 # ifdef CHECK_CXXLAPACK
377 //
378 // Compare generic results with results from the native implementation
379 //
380 const IndexType _info = ormlq_native(side, trans, A, C);
381
382 ASSERT(info==_info);
383 # endif
384 return info;
385 }
386
387 template <typename MA, typename VTAU, typename MC, typename VWORK>
388 void
389 ormlq(Side side,
390 Transpose trans,
391 GeMatrix<MA> &A,
392 const DenseVector<VTAU> &tau,
393 GeMatrix<MC> &C,
394 DenseVector<VWORK> &work)
395 {
396 //
397 // Test the input parameters
398 //
399 # ifndef NDEBUG
400 typedef typename GeMatrix<MC>::IndexType IndexType;
401
402 const IndexType m = C.numRows();
403 const IndexType n = C.numCols();
404 const IndexType k = A.numRows();
405
406 ASSERT(tau.length()==k);
407
408 if (side==Left) {
409 ASSERT(A.numCols()==m);
410 } else {
411 ASSERT(A.numCols()==n);
412 }
413
414 if (work.length()>0) {
415 if (side==Left) {
416 ASSERT(work.length()>=n);
417 } else {
418 ASSERT(work.length()>=m);
419 }
420 }
421 # endif
422
423 //
424 // Make copies of output arguments
425 //
426 # ifdef CHECK_CXXLAPACK
427 typename GeMatrix<MA>::NoView A_org = A;
428 typename GeMatrix<MC>::NoView C_org = C;
429 typename DenseVector<VWORK>::NoView work_org = work;
430 # endif
431
432 //
433 // Call implementation
434 //
435 ormlq_generic(side, trans, A, tau, C, work);
436
437 # ifdef CHECK_CXXLAPACK
438 //
439 // Make copies of results computed by the generic implementation
440 //
441 typename GeMatrix<MA>::NoView A_generic = A;
442 typename GeMatrix<MC>::NoView C_generic = C;
443 typename DenseVector<VWORK>::NoView work_generic = work;
444
445 //
446 // restore output arguments
447 //
448 A = A_org;
449 C = C_org;
450 work = work_org;
451
452 //
453 // Compare generic results with results from the native implementation
454 //
455 ormlq_native(side, trans, A, tau, C, work);
456
457 bool failed = false;
458 if (! isIdentical(A_generic, A, "A_generic", "A")) {
459 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
460 std::cerr << "F77LAPACK: A = " << A << std::endl;
461 failed = true;
462 }
463 if (! isIdentical(C_generic, C, "C_generic", "C")) {
464 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
465 std::cerr << "F77LAPACK: C = " << C << std::endl;
466 failed = true;
467 }
468 if (! isIdentical(work_generic, work, "work_generic", "work")) {
469 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
470 std::cerr << "F77LAPACK: work = " << work << std::endl;
471 failed = true;
472 }
473
474 if (failed) {
475 std::cerr << "error in: ormlq.tcc" << std::endl;
476 ASSERT(0);
477 } else {
478 // std::cerr << "passed: ormlq.tcc" << std::endl;
479 }
480 # endif
481 }
482
483 //-- forwarding ----------------------------------------------------------------
484 template <typename MA, typename MC>
485 typename MC::IndexType
486 ormlq_wsq(Side side,
487 Transpose trans,
488 MA &&A,
489 MC &&C)
490 {
491 typedef typename MC::IndexType IndexType;
492
493 CHECKPOINT_ENTER;
494 const IndexType info = ormlq_wsq(side, trans, A, C);
495 CHECKPOINT_LEAVE;
496
497 return info;
498 }
499
500 template <typename MA, typename VTAU, typename MC, typename VWORK>
501 void
502 ormlq(Side side,
503 Transpose trans,
504 MA &&A,
505 const VTAU &tau,
506 MC &&C,
507 VWORK &&work)
508 {
509 // TODO: asser that A is non-const
510 CHECKPOINT_ENTER;
511 ormlq(side, trans, A, tau, C, work);
512 CHECKPOINT_LEAVE;
513 }
514
515 } } // namespace lapack, flens
516
517 #endif // FLENS_LAPACK_LQ_ORMLQ_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 *
35 SUBROUTINE DORMLQ( SIDE, TRANS, M, N, K, A, LDA, TAU, C, LDC,
36 $ WORK, LWORK, INFO )
37 *
38 * -- LAPACK routine (version 3.3.1) --
39 * -- LAPACK is a software package provided by Univ. of Tennessee, --
40 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
41 * -- April 2011 --
42 */
43
44 #ifndef FLENS_LAPACK_LQ_ORMLQ_TCC
45 #define FLENS_LAPACK_LQ_ORMLQ_TCC 1
46
47 #include <flens/blas/blas.h>
48 #include <flens/lapack/lapack.h>
49
50 namespace flens { namespace lapack {
51
52 //== generic lapack implementation =============================================
53
54 template <typename MA, typename MC>
55 typename GeMatrix<MC>::IndexType
56 ormlq_generic_wsq(Side side,
57 Transpose trans,
58 GeMatrix<MA> &A,
59 GeMatrix<MC> &C)
60 {
61 using std::max;
62 using std::min;
63
64 typedef typename GeMatrix<MC>::ElementType T;
65 typedef typename GeMatrix<MC>::IndexType IndexType;
66
67 typedef typename GeMatrix<MC>::View GeView;
68 typedef typename GeView::Engine GeViewEngine;
69
70 //
71 // Paramter for maximum block size and buffer for TrMatrix Tr.
72 //
73 const IndexType nbMax = 64;
74
75 const IndexType m = C.numRows();
76 const IndexType n = C.numCols();
77 const IndexType k = A.numRows();
78
79 const IndexType nw = (side==Left) ? n : m;
80
81 //
82 // Determine the block size. nb may be at most nbMax, where nbMax
83 // is used to define the local array tr.
84 //
85 char opt[3];
86 opt[0] = char(side);
87 if (trans==NoTrans) {
88 opt[1] = 'N';
89 } else if (trans==Conj) {
90 opt[1] = 'R';
91 } else if (trans==Trans) {
92 opt[1] = 'T';
93 } else if (trans==ConjTrans) {
94 opt[1] = 'C';
95 }
96 opt[2] = 0;
97
98 IndexType nb = min(nbMax, IndexType(ilaenv<T>(1, "ORMLQ", opt, m, n, k)));
99 return max(IndexType(1), nw)*nb;
100 }
101
102 template <typename MA, typename VTAU, typename MC, typename VWORK>
103 void
104 ormlq_generic(Side side,
105 Transpose trans,
106 GeMatrix<MA> &A,
107 const DenseVector<VTAU> &tau,
108 GeMatrix<MC> &C,
109 DenseVector<VWORK> &work)
110 {
111 using std::max;
112 using std::min;
113
114 typedef typename GeMatrix<MC>::ElementType T;
115 typedef typename GeMatrix<MC>::IndexType IndexType;
116
117 typedef typename GeMatrix<MC>::View GeView;
118 typedef typename GeView::Engine GeViewEngine;
119
120 const Underscore<IndexType> _;
121
122
123 //
124 // Paramter for maximum block size and buffer for TrMatrix Tr.
125 //
126 const IndexType nbMax = 64;
127 const IndexType ldt = nbMax + 1;
128 T trBuffer[nbMax*ldt];
129
130 const IndexType m = C.numRows();
131 const IndexType n = C.numCols();
132 const IndexType k = A.numRows();
133
134 const bool noTrans = (trans==Trans || trans==ConjTrans) ? false : true;
135 //
136 // nq is the order of Q and nw is the minimum dimension of work
137 //
138 IndexType nq, nw;
139 if (side==Left) {
140 nq = m;
141 nw = n;
142 } else {
143 nq = n;
144 nw = m;
145 }
146 //
147 // Determine the block size. nb may be at most nbMax, where nbMax
148 // is used to define the local array tr.
149 //
150 char opt[3];
151 opt[0] = (side==Left) ? 'L' : 'R';
152 if (trans==NoTrans) {
153 opt[1] = 'N';
154 } else if (trans==Conj) {
155 opt[1] = 'R';
156 } else if (trans==Trans) {
157 opt[1] = 'T';
158 } else if (trans==ConjTrans) {
159 opt[1] = 'C';
160 }
161 opt[2] = 0;
162
163 IndexType nb = min(nbMax, IndexType(ilaenv<T>(1, "ORMLQ", opt, m, n, k)));
164 IndexType lWorkOpt = max(IndexType(1), nw)*nb;
165
166 if (work.length()==0) {
167 work.resize(lWorkOpt);
168 }
169
170 //
171 // Quick return if possible
172 //
173 if ((m==0) || (n==0) || (k==0)) {
174 work(1) = 1;
175 return;
176 }
177
178 IndexType nbMin = 2;
179 IndexType iws;
180 if ((nb>1) && (nb<k)) {
181 iws = lWorkOpt;
182 if (work.length()<iws) {
183 nb = work.length()/nw;
184 nbMin = max(nbMin, IndexType(ilaenv<T>(2, "ORMLQ", opt, m, n, k)));
185 }
186 } else {
187 iws = nw;
188 }
189
190 if ((nb<nbMin) || (nb>=k)) {
191 //
192 // Use unblocked code
193 //
194 auto _work = (side==Left) ? work(_(1,n)) : work(_(1,m));
195 orml2(side, trans, A, tau, C, _work);
196 } else {
197 //
198 // Use blocked code
199 //
200 IndexType iBeg, iInc, iEnd;
201 if ((side==Left && noTrans) || (side==Right && !noTrans)) {
202 iBeg = 1;
203 iEnd = ((k-1)/nb)*nb + 1;
204 iInc = nb;
205 } else {
206 iBeg = ((k-1)/nb)*nb + 1;
207 iEnd = 1;
208 iInc = -nb;
209 }
210 iEnd += iInc;
211
212 IndexType ic, jc;
213 if (side==Left) {
214 jc = 1;
215 } else {
216 ic = 1;
217 }
218
219 const Transpose transT = (trans==NoTrans) ? Trans : NoTrans;
220
221 typename GeMatrix<MA>::View Work(nw, nb, work);
222
223 for (IndexType i=iBeg; i!=iEnd; i+=iInc) {
224 const IndexType ib = min(nb, k-i+1);
225 GeView Tr = GeViewEngine(ib, ib, trBuffer, ldt);
226 //
227 // Form the triangular factor of the block reflector
228 // H = H(i) H(i+1) . . . H(i+ib-1)
229 //
230 larft(Forward, RowWise, nq-i+1,
231 A(_(i,i+ib-1),_(i,nq)), tau(_(i,i+ib-1)), Tr.upper());
232
233 if (side==Left) {
234 //
235 // H or H**T is applied to C(i:m,1:n)
236 //
237 ic = i;
238 } else {
239 //
240 // H or H**T is applied to C(1:m,i:n)
241 //
242 jc = i;
243 }
244
245 larfb(side, transT,
246 Forward, RowWise,
247 A(_(i,i+ib-1),_(i,nq)), Tr.upper(),
248 C(_(ic,m),_(jc,n)),
249 Work(_,_(1,ib)));
250 }
251 }
252 work(1) = lWorkOpt;
253 }
254
255 //== interface for native lapack ===============================================
256
257 #ifdef CHECK_CXXLAPACK
258
259 template <typename MA, typename MC>
260 typename GeMatrix<MC>::IndexType
261 ormlq_native_wsq(Side side,
262 Transpose trans,
263 GeMatrix<MA> &A,
264 GeMatrix<MC> &C)
265 {
266 typedef typename GeMatrix<MC>::ElementType T;
267
268 const char SIDE = char(side);
269 const char TRANS = getF77LapackChar(trans);
270 const INTEGER M = C.numRows();
271 const INTEGER N = C.numCols();
272 const INTEGER K = A.numRows();
273 const INTEGER LDA = A.leadingDimension();
274 const INTEGER LDC = C.leadingDimension();
275 T WORK, DUMMY;
276 const INTEGER LWORK = -1;
277 INTEGER INFO;
278
279 if (IsSame<T,DOUBLE>::value) {
280 LAPACK_IMPL(dormlq)(&SIDE,
281 &TRANS,
282 &M,
283 &N,
284 &K,
285 A.data(),
286 &LDA,
287 &DUMMY,
288 C.data(),
289 &LDC,
290 &WORK,
291 &LWORK,
292 &INFO);
293 } else {
294 ASSERT(0);
295 }
296
297 ASSERT(INFO>=0);
298 return WORK;
299 }
300
301 template <typename MA, typename VTAU, typename MC, typename VWORK>
302 void
303 ormlq_native(Side side,
304 Transpose trans,
305 GeMatrix<MA> &A,
306 const DenseVector<VTAU> &tau,
307 GeMatrix<MC> &C,
308 DenseVector<VWORK> &work)
309 {
310 typedef typename GeMatrix<MC>::ElementType T;
311
312 const char SIDE = char(side);
313 const char TRANS = getF77LapackChar(trans);
314 const INTEGER M = C.numRows();
315 const INTEGER N = C.numCols();
316 const INTEGER K = A.numRows();
317 const INTEGER LDA = A.leadingDimension();
318 const INTEGER LDC = C.leadingDimension();
319 const INTEGER LWORK = work.length();
320 INTEGER INFO;
321
322 if (IsSame<T,DOUBLE>::value) {
323 LAPACK_IMPL(dormlq)(&SIDE,
324 &TRANS,
325 &M,
326 &N,
327 &K,
328 A.data(),
329 &LDA,
330 tau.data(),
331 C.data(),
332 &LDC,
333 work.data(),
334 &LWORK,
335 &INFO);
336 } else {
337 ASSERT(0);
338 }
339
340 ASSERT(INFO>=0);
341 }
342
343 #endif // CHECK_CXXLAPACK
344
345 //== public interface ==========================================================
346
347 template <typename MA, typename MC>
348 typename GeMatrix<MC>::IndexType
349 ormlq_wsq(Side side,
350 Transpose trans,
351 GeMatrix<MA> &A,
352 GeMatrix<MC> &C)
353 {
354 typedef typename GeMatrix<MC>::IndexType IndexType;
355
356 //
357 // Test the input parameters
358 //
359 # ifndef NDEBUG
360 const IndexType m = C.numRows();
361 const IndexType n = C.numCols();
362 const IndexType k = A.numRows();
363
364 if (side==Left) {
365 ASSERT(A.numCols()==m);
366 } else {
367 ASSERT(A.numCols()==n);
368 }
369 # endif
370
371 //
372 // Call implementation
373 //
374 const IndexType info = ormlq_generic(side, trans, A, C);
375
376 # ifdef CHECK_CXXLAPACK
377 //
378 // Compare generic results with results from the native implementation
379 //
380 const IndexType _info = ormlq_native(side, trans, A, C);
381
382 ASSERT(info==_info);
383 # endif
384 return info;
385 }
386
387 template <typename MA, typename VTAU, typename MC, typename VWORK>
388 void
389 ormlq(Side side,
390 Transpose trans,
391 GeMatrix<MA> &A,
392 const DenseVector<VTAU> &tau,
393 GeMatrix<MC> &C,
394 DenseVector<VWORK> &work)
395 {
396 //
397 // Test the input parameters
398 //
399 # ifndef NDEBUG
400 typedef typename GeMatrix<MC>::IndexType IndexType;
401
402 const IndexType m = C.numRows();
403 const IndexType n = C.numCols();
404 const IndexType k = A.numRows();
405
406 ASSERT(tau.length()==k);
407
408 if (side==Left) {
409 ASSERT(A.numCols()==m);
410 } else {
411 ASSERT(A.numCols()==n);
412 }
413
414 if (work.length()>0) {
415 if (side==Left) {
416 ASSERT(work.length()>=n);
417 } else {
418 ASSERT(work.length()>=m);
419 }
420 }
421 # endif
422
423 //
424 // Make copies of output arguments
425 //
426 # ifdef CHECK_CXXLAPACK
427 typename GeMatrix<MA>::NoView A_org = A;
428 typename GeMatrix<MC>::NoView C_org = C;
429 typename DenseVector<VWORK>::NoView work_org = work;
430 # endif
431
432 //
433 // Call implementation
434 //
435 ormlq_generic(side, trans, A, tau, C, work);
436
437 # ifdef CHECK_CXXLAPACK
438 //
439 // Make copies of results computed by the generic implementation
440 //
441 typename GeMatrix<MA>::NoView A_generic = A;
442 typename GeMatrix<MC>::NoView C_generic = C;
443 typename DenseVector<VWORK>::NoView work_generic = work;
444
445 //
446 // restore output arguments
447 //
448 A = A_org;
449 C = C_org;
450 work = work_org;
451
452 //
453 // Compare generic results with results from the native implementation
454 //
455 ormlq_native(side, trans, A, tau, C, work);
456
457 bool failed = false;
458 if (! isIdentical(A_generic, A, "A_generic", "A")) {
459 std::cerr << "CXXLAPACK: A_generic = " << A_generic << std::endl;
460 std::cerr << "F77LAPACK: A = " << A << std::endl;
461 failed = true;
462 }
463 if (! isIdentical(C_generic, C, "C_generic", "C")) {
464 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
465 std::cerr << "F77LAPACK: C = " << C << std::endl;
466 failed = true;
467 }
468 if (! isIdentical(work_generic, work, "work_generic", "work")) {
469 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
470 std::cerr << "F77LAPACK: work = " << work << std::endl;
471 failed = true;
472 }
473
474 if (failed) {
475 std::cerr << "error in: ormlq.tcc" << std::endl;
476 ASSERT(0);
477 } else {
478 // std::cerr << "passed: ormlq.tcc" << std::endl;
479 }
480 # endif
481 }
482
483 //-- forwarding ----------------------------------------------------------------
484 template <typename MA, typename MC>
485 typename MC::IndexType
486 ormlq_wsq(Side side,
487 Transpose trans,
488 MA &&A,
489 MC &&C)
490 {
491 typedef typename MC::IndexType IndexType;
492
493 CHECKPOINT_ENTER;
494 const IndexType info = ormlq_wsq(side, trans, A, C);
495 CHECKPOINT_LEAVE;
496
497 return info;
498 }
499
500 template <typename MA, typename VTAU, typename MC, typename VWORK>
501 void
502 ormlq(Side side,
503 Transpose trans,
504 MA &&A,
505 const VTAU &tau,
506 MC &&C,
507 VWORK &&work)
508 {
509 // TODO: asser that A is non-const
510 CHECKPOINT_ENTER;
511 ormlq(side, trans, A, tau, C, work);
512 CHECKPOINT_LEAVE;
513 }
514
515 } } // namespace lapack, flens
516
517 #endif // FLENS_LAPACK_LQ_ORMLQ_TCC