1 /*
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
35 IMPLICIT NONE
36 *
37 * -- LAPACK auxiliary routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_AUX_LARFX_TCC
44 #define FLENS_LAPACK_AUX_LARFX_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename VV, typename TAU, typename MC, typename VWORK>
54 void
55 larfx_generic(Side side, const DenseVector<VV> &v, const TAU &tau,
56 GeMatrix<MC> &C, DenseVector<VWORK> &work)
57 {
58 typedef typename GeMatrix<MC>::IndexType IndexType;
59 typedef typename GeMatrix<MC>::ElementType T;
60
61 const IndexType m = C.numRows();
62 const IndexType n = C.numCols();
63
64 const T Zero(0), One(1);
65
66 if (tau==Zero) {
67 return;
68 }
69 if (side==Left) {
70 //
71 // Form H * C, where H has order m.
72 //
73 switch(m) {
74 //
75 // Special code for 1 x 1 Householder
76 //
77 case 1:
78 {
79 const T tmp = One - tau*v(1)*v(1);
80 for (IndexType j=1; j<=n; ++j) {
81 C(1,j) *= tmp;
82 }
83 }
84 return;
85 //
86 // Special code for 2 x 2 Householder
87 //
88 case 2:
89 {
90 const T v1 = v(1);
91 const T t1 = tau*v1;
92 const T v2 = v(2);
93 const T t2 = tau*v2;
94 for (IndexType j=1; j<=n; ++j) {
95 const T sum = v1*C(1,j) + v2*C(2,j);
96 C(1,j) -= sum*t1;
97 C(2,j) -= sum*t2;
98 }
99 }
100 return;
101 //
102 // Special code for 3 x 3 Householder
103 //
104 case 3:
105 {
106 const T v1 = v(1);
107 const T t1 = tau*v1;
108 const T v2 = v(2);
109 const T t2 = tau*v2;
110 const T v3 = v(3);
111 const T t3 = tau*v3;
112 for (IndexType j=1; j<=n; ++j) {
113 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j);
114 C(1,j) -= sum*t1;
115 C(2,j) -= sum*t2;
116 C(3,j) -= sum*t3;
117 }
118 }
119 return;
120 //
121 // Special code for 4 x 4 Householder
122 //
123 case 4:
124 {
125 const T v1 = v(1);
126 const T t1 = tau*v1;
127 const T v2 = v(2);
128 const T t2 = tau*v2;
129 const T v3 = v(3);
130 const T t3 = tau*v3;
131 const T v4 = v(4);
132 const T t4 = tau*v4;
133 for (IndexType j=1; j<=n; ++j) {
134 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j);
135 C(1,j) -= sum*t1;
136 C(2,j) -= sum*t2;
137 C(3,j) -= sum*t3;
138 C(4,j) -= sum*t4;
139 }
140 }
141 return;
142 //
143 // Special code for 5 x 5 Householder
144 //
145 case 5:
146 {
147 const T v1 = v(1);
148 const T t1 = tau*v1;
149 const T v2 = v(2);
150 const T t2 = tau*v2;
151 const T v3 = v(3);
152 const T t3 = tau*v3;
153 const T v4 = v(4);
154 const T t4 = tau*v4;
155 const T v5 = v(5);
156 const T t5 = tau*v5;
157 for (IndexType j=1; j<=n; ++j) {
158 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
159 + v5*C(5,j);
160 C(1,j) -= sum*t1;
161 C(2,j) -= sum*t2;
162 C(3,j) -= sum*t3;
163 C(4,j) -= sum*t4;
164 C(5,j) -= sum*t5;
165 }
166 }
167 return;
168 //
169 // Special code for 6 x 6 Householder
170 //
171 case 6:
172 {
173 const T v1 = v(1);
174 const T t1 = tau*v1;
175 const T v2 = v(2);
176 const T t2 = tau*v2;
177 const T v3 = v(3);
178 const T t3 = tau*v3;
179 const T v4 = v(4);
180 const T t4 = tau*v4;
181 const T v5 = v(5);
182 const T t5 = tau*v5;
183 const T v6 = v(6);
184 const T t6 = tau*v6;
185 for (IndexType j=1; j<=n; ++j) {
186 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
187 + v5*C(5,j) + v6*C(6,j);
188 C(1,j) -= sum*t1;
189 C(2,j) -= sum*t2;
190 C(3,j) -= sum*t3;
191 C(4,j) -= sum*t4;
192 C(5,j) -= sum*t5;
193 C(6,j) -= sum*t6;
194 }
195 }
196 return;
197 //
198 // Special code for 7 x 7 Householder
199 //
200 case 7:
201 {
202 const T v1 = v(1);
203 const T t1 = tau*v1;
204 const T v2 = v(2);
205 const T t2 = tau*v2;
206 const T v3 = v(3);
207 const T t3 = tau*v3;
208 const T v4 = v(4);
209 const T t4 = tau*v4;
210 const T v5 = v(5);
211 const T t5 = tau*v5;
212 const T v6 = v(6);
213 const T t6 = tau*v6;
214 const T v7 = v(7);
215 const T t7 = tau*v7;
216 for (IndexType j=1; j<=n; ++j) {
217 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
218 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j);
219 C(1,j) -= sum*t1;
220 C(2,j) -= sum*t2;
221 C(3,j) -= sum*t3;
222 C(4,j) -= sum*t4;
223 C(5,j) -= sum*t5;
224 C(6,j) -= sum*t6;
225 C(7,j) -= sum*t7;
226 }
227 }
228 return;
229 //
230 // Special code for 8 x 8 Householder
231 //
232 case 8:
233 {
234 const T v1 = v(1);
235 const T t1 = tau*v1;
236 const T v2 = v(2);
237 const T t2 = tau*v2;
238 const T v3 = v(3);
239 const T t3 = tau*v3;
240 const T v4 = v(4);
241 const T t4 = tau*v4;
242 const T v5 = v(5);
243 const T t5 = tau*v5;
244 const T v6 = v(6);
245 const T t6 = tau*v6;
246 const T v7 = v(7);
247 const T t7 = tau*v7;
248 const T v8 = v(8);
249 const T t8 = tau*v8;
250 for (IndexType j=1; j<=n; ++j) {
251 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
252 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j);
253 C(1,j) -= sum*t1;
254 C(2,j) -= sum*t2;
255 C(3,j) -= sum*t3;
256 C(4,j) -= sum*t4;
257 C(5,j) -= sum*t5;
258 C(6,j) -= sum*t6;
259 C(7,j) -= sum*t7;
260 C(8,j) -= sum*t8;
261 }
262 }
263 return;
264 //
265 // Special code for 9 x 9 Householder
266 //
267 case 9:
268 {
269 const T v1 = v(1);
270 const T t1 = tau*v1;
271 const T v2 = v(2);
272 const T t2 = tau*v2;
273 const T v3 = v(3);
274 const T t3 = tau*v3;
275 const T v4 = v(4);
276 const T t4 = tau*v4;
277 const T v5 = v(5);
278 const T t5 = tau*v5;
279 const T v6 = v(6);
280 const T t6 = tau*v6;
281 const T v7 = v(7);
282 const T t7 = tau*v7;
283 const T v8 = v(8);
284 const T t8 = tau*v8;
285 const T v9 = v(9);
286 const T t9 = tau*v9;
287 for (IndexType j=1; j<=n; ++j) {
288 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
289 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j)
290 + v9*C(9,j);
291 C(1,j) -= sum*t1;
292 C(2,j) -= sum*t2;
293 C(3,j) -= sum*t3;
294 C(4,j) -= sum*t4;
295 C(5,j) -= sum*t5;
296 C(6,j) -= sum*t6;
297 C(7,j) -= sum*t7;
298 C(8,j) -= sum*t8;
299 C(9,j) -= sum*t9;
300 }
301 }
302 return;
303 //
304 // Special code for 10 x 10 Householder
305 //
306 case 10:
307 {
308 const T v1 = v(1);
309 const T t1 = tau*v1;
310 const T v2 = v(2);
311 const T t2 = tau*v2;
312 const T v3 = v(3);
313 const T t3 = tau*v3;
314 const T v4 = v(4);
315 const T t4 = tau*v4;
316 const T v5 = v(5);
317 const T t5 = tau*v5;
318 const T v6 = v(6);
319 const T t6 = tau*v6;
320 const T v7 = v(7);
321 const T t7 = tau*v7;
322 const T v8 = v(8);
323 const T t8 = tau*v8;
324 const T v9 = v(9);
325 const T t9 = tau*v9;
326 const T v10 = v(10);
327 const T t10 = tau*v10;
328 for (IndexType j=1; j<=n; ++j) {
329 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
330 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j)
331 + v9*C(9,j) + v10*C(10,j);
332 C(1,j) -= sum*t1;
333 C(2,j) -= sum*t2;
334 C(3,j) -= sum*t3;
335 C(4,j) -= sum*t4;
336 C(5,j) -= sum*t5;
337 C(6,j) -= sum*t6;
338 C(7,j) -= sum*t7;
339 C(8,j) -= sum*t8;
340 C(9,j) -= sum*t9;
341 C(10,j) -= sum*t10;
342 }
343 }
344 return;
345 //
346 // Code for general M
347 //
348 default:
349 larf(side, v, tau, C, work);
350 return;
351 }
352 } else {
353 //
354 // Form C * H, where H has order n.
355 //
356 switch(n) {
357 //
358 // Special code for 1 x 1 Householder
359 //
360 case 1:
361 {
362 const T tmp = One - tau*v(1)*v(1);
363 for (IndexType i=1; i<=m; ++i) {
364 C(i,1) *= tmp;
365 }
366 }
367 return;
368 //
369 // Special code for 2 x 2 Householder
370 //
371 case 2:
372 {
373 const T v1 = v(1);
374 const T t1 = tau*v1;
375 const T v2 = v(2);
376 const T t2 = tau*v2;
377 for (IndexType i=1; i<=m; ++i) {
378 const T sum = v1*C(i,1) + v2*C(i,2);
379 C(i,1) -= sum*t1;
380 C(i,2) -= sum*t2;
381 }
382 }
383 return;
384 //
385 // Special code for 3 x 3 Householder
386 //
387 case 3:
388 {
389 const T v1 = v(1);
390 const T t1 = tau*v1;
391 const T v2 = v(2);
392 const T t2 = tau*v2;
393 const T v3 = v(3);
394 const T t3 = tau*v3;
395 for (IndexType i=1; i<=m; ++i) {
396 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3);
397 C(i,1) -= sum*t1;
398 C(i,2) -= sum*t2;
399 C(i,3) -= sum*t3;
400 }
401 }
402 return;
403 //
404 // Special code for 4 x 4 Householder
405 //
406 case 4:
407 {
408 const T v1 = v(1);
409 const T t1 = tau*v1;
410 const T v2 = v(2);
411 const T t2 = tau*v2;
412 const T v3 = v(3);
413 const T t3 = tau*v3;
414 const T v4 = v(4);
415 const T t4 = tau*v4;
416 for (IndexType i=1; i<=m; ++i) {
417 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4);
418 C(i,1) -= sum*t1;
419 C(i,2) -= sum*t2;
420 C(i,3) -= sum*t3;
421 C(i,4) -= sum*t4;
422 }
423 }
424 return;
425 //
426 // Special code for 5 x 5 Householder
427 //
428 case 5:
429 {
430 const T v1 = v(1);
431 const T t1 = tau*v1;
432 const T v2 = v(2);
433 const T t2 = tau*v2;
434 const T v3 = v(3);
435 const T t3 = tau*v3;
436 const T v4 = v(4);
437 const T t4 = tau*v4;
438 const T v5 = v(5);
439 const T t5 = tau*v5;
440 for (IndexType i=1; i<=m; ++i) {
441 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
442 + v5*C(i,5);
443 C(i,1) -= sum*t1;
444 C(i,2) -= sum*t2;
445 C(i,3) -= sum*t3;
446 C(i,4) -= sum*t4;
447 C(i,5) -= sum*t5;
448 }
449 }
450 return;
451 //
452 // Special code for 6 x 6 Householder
453 //
454 case 6:
455 {
456 const T v1 = v(1);
457 const T t1 = tau*v1;
458 const T v2 = v(2);
459 const T t2 = tau*v2;
460 const T v3 = v(3);
461 const T t3 = tau*v3;
462 const T v4 = v(4);
463 const T t4 = tau*v4;
464 const T v5 = v(5);
465 const T t5 = tau*v5;
466 const T v6 = v(6);
467 const T t6 = tau*v6;
468 for (IndexType i=1; i<=m; ++i) {
469 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
470 + v5*C(i,5) + v6*C(i,6);
471 C(i,1) -= sum*t1;
472 C(i,2) -= sum*t2;
473 C(i,3) -= sum*t3;
474 C(i,4) -= sum*t4;
475 C(i,5) -= sum*t5;
476 C(i,6) -= sum*t6;
477 }
478 }
479 return;
480 //
481 // Special code for 7 x 7 Householder
482 //
483 case 7:
484 {
485 const T v1 = v(1);
486 const T t1 = tau*v1;
487 const T v2 = v(2);
488 const T t2 = tau*v2;
489 const T v3 = v(3);
490 const T t3 = tau*v3;
491 const T v4 = v(4);
492 const T t4 = tau*v4;
493 const T v5 = v(5);
494 const T t5 = tau*v5;
495 const T v6 = v(6);
496 const T t6 = tau*v6;
497 const T v7 = v(7);
498 const T t7 = tau*v7;
499 for (IndexType i=1; i<=m; ++i) {
500 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
501 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7);
502 C(i,1) -= sum*t1;
503 C(i,2) -= sum*t2;
504 C(i,3) -= sum*t3;
505 C(i,4) -= sum*t4;
506 C(i,5) -= sum*t5;
507 C(i,6) -= sum*t6;
508 C(i,7) -= sum*t7;
509 }
510 }
511 return;
512 //
513 // Special code for 8 x 8 Householder
514 //
515 case 8:
516 {
517 const T v1 = v(1);
518 const T t1 = tau*v1;
519 const T v2 = v(2);
520 const T t2 = tau*v2;
521 const T v3 = v(3);
522 const T t3 = tau*v3;
523 const T v4 = v(4);
524 const T t4 = tau*v4;
525 const T v5 = v(5);
526 const T t5 = tau*v5;
527 const T v6 = v(6);
528 const T t6 = tau*v6;
529 const T v7 = v(7);
530 const T t7 = tau*v7;
531 const T v8 = v(8);
532 const T t8 = tau*v8;
533 for (IndexType i=1; i<=m; ++i) {
534 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
535 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8);
536 C(i,1) -= sum*t1;
537 C(i,2) -= sum*t2;
538 C(i,3) -= sum*t3;
539 C(i,4) -= sum*t4;
540 C(i,5) -= sum*t5;
541 C(i,6) -= sum*t6;
542 C(i,7) -= sum*t7;
543 C(i,8) -= sum*t8;
544 }
545 }
546 return;
547 //
548 // Special code for 9 x 9 Householder
549 //
550 case 9:
551 {
552 const T v1 = v(1);
553 const T t1 = tau*v1;
554 const T v2 = v(2);
555 const T t2 = tau*v2;
556 const T v3 = v(3);
557 const T t3 = tau*v3;
558 const T v4 = v(4);
559 const T t4 = tau*v4;
560 const T v5 = v(5);
561 const T t5 = tau*v5;
562 const T v6 = v(6);
563 const T t6 = tau*v6;
564 const T v7 = v(7);
565 const T t7 = tau*v7;
566 const T v8 = v(8);
567 const T t8 = tau*v8;
568 const T v9 = v(9);
569 const T t9 = tau*v9;
570 for (IndexType i=1; i<=m; ++i) {
571 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
572 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8)
573 + v9*C(i,9);
574 C(i,1) -= sum*t1;
575 C(i,2) -= sum*t2;
576 C(i,3) -= sum*t3;
577 C(i,4) -= sum*t4;
578 C(i,5) -= sum*t5;
579 C(i,6) -= sum*t6;
580 C(i,7) -= sum*t7;
581 C(i,8) -= sum*t8;
582 C(i,9) -= sum*t9;
583 }
584 }
585 return;
586 //
587 // Special code for 10 x 10 Householder
588 //
589 case 10:
590 {
591 const T v1 = v(1);
592 const T t1 = tau*v1;
593 const T v2 = v(2);
594 const T t2 = tau*v2;
595 const T v3 = v(3);
596 const T t3 = tau*v3;
597 const T v4 = v(4);
598 const T t4 = tau*v4;
599 const T v5 = v(5);
600 const T t5 = tau*v5;
601 const T v6 = v(6);
602 const T t6 = tau*v6;
603 const T v7 = v(7);
604 const T t7 = tau*v7;
605 const T v8 = v(8);
606 const T t8 = tau*v8;
607 const T v9 = v(9);
608 const T t9 = tau*v9;
609 const T v10 = v(10);
610 const T t10 = tau*v10;
611 for (IndexType i=1; i<=m; ++i) {
612 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
613 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8)
614 + v9*C(i,9) + v10*C(i,10);
615 C(i,1) -= sum*t1;
616 C(i,2) -= sum*t2;
617 C(i,3) -= sum*t3;
618 C(i,4) -= sum*t4;
619 C(i,5) -= sum*t5;
620 C(i,6) -= sum*t6;
621 C(i,7) -= sum*t7;
622 C(i,8) -= sum*t8;
623 C(i,9) -= sum*t9;
624 C(i,10) -= sum*t10;
625 }
626 }
627 return;
628 //
629 // Code for general N
630 //
631 default:
632 larf(side, v, tau, C, work);
633 return;
634 }
635 }
636 }
637
638 //== interface for native lapack ===============================================
639
640 #ifdef CHECK_CXXLAPACK
641
642 template <typename VV, typename TAU, typename MC, typename VWORK>
643 void
644 larfx_native(Side side, const DenseVector<VV> &v, const TAU &tau,
645 GeMatrix<MC> &C, DenseVector<VWORK> &work)
646 {
647 typedef typename GeMatrix<MC>::ElementType T;
648
649 const char SIDE = getF77LapackChar<Side>(side);
650 const INTEGER M = C.numRows();
651 const INTEGER N = C.numCols();
652 const INTEGER LDC = C.leadingDimension();
653
654 if (IsSame<T,DOUBLE>::value) {
655 LAPACK_IMPL(dlarfx)(&SIDE,
656 &M,
657 &N,
658 v.data(),
659 &tau,
660 C.data(),
661 &LDC,
662 work.data());
663 } else {
664 ASSERT(0);
665 }
666 }
667
668 #endif // CHECK_CXXLAPACK
669
670 //== public interface ==========================================================
671
672 template <typename VV, typename TAU, typename MC, typename VWORK>
673 void
674 larfx(Side side, const DenseVector<VV> &v, const TAU &tau,
675 GeMatrix<MC> &C, DenseVector<VWORK> &work)
676 {
677 LAPACK_DEBUG_OUT("larfx");
678
679 //
680 // Test the input parameters
681 //
682 # ifndef NDEBUG
683 typedef typename GeMatrix<MC>::IndexType IndexType;
684
685 ASSERT(v.inc()>0);
686 ASSERT(v.firstIndex()==1);
687 ASSERT(C.firstRow()==1);
688 ASSERT(C.firstCol()==1);
689
690 const IndexType m = C.numRows();
691 const IndexType n = C.numCols();
692
693 if (side==Left) {
694 ASSERT(v.length()==m);
695 ASSERT(work.length()==n);
696 } else {
697 ASSERT(v.length()==n);
698 ASSERT(work.length()==m);
699 }
700
701 # endif
702
703 # ifdef CHECK_CXXLAPACK
704 //
705 // Make copies of output arguments
706 //
707 typename GeMatrix<MC>::NoView C_org = C;
708 typename DenseVector<VV>::NoView work_org = work;
709 # endif
710
711 //
712 // Call implementation
713 //
714 larfx_generic(side, v, tau, C, work);
715
716 # ifdef CHECK_CXXLAPACK
717 //
718 // Make copies of results computed by the generic implementation
719 //
720 typename GeMatrix<MC>::NoView C_generic = C;
721 typename DenseVector<VV>::NoView work_generic = work;
722
723 //
724 // restore output arguments
725 //
726 C = C_org;
727 work = work_org;
728
729 //
730 // Compare generic results with results from the native implementation
731 //
732 larfx_native(side, v, tau, C, work);
733
734 bool failed = false;
735
736 if (! isIdentical(C_generic, C, "C_generic", "C")) {
737 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
738 std::cerr << "F77LAPACK: C = " << C << std::endl;
739 failed = true;
740 }
741
742 if (! isIdentical(work_generic, work, "work_generic", "work")) {
743 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
744 std::cerr << "F77LAPACK: work = " << work << std::endl;
745 failed = true;
746 }
747
748 if (failed) {
749 ASSERT(0);
750 }
751 # endif
752 }
753
754 //-- forwarding ----------------------------------------------------------------
755 template <typename VV, typename TAU, typename MC, typename VWORK>
756 void
757 larfx(Side side, const VV &v, const TAU &tau, MC &&C, VWORK &&work)
758 {
759 larfx(side, v, tau, C, work);
760 }
761
762 } } // namespace lapack, flens
763
764 #endif // FLENS_LAPACK_AUX_LARFX_TCC
2 * Copyright (c) 2011, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /* Based on
34 SUBROUTINE DLARFX( SIDE, M, N, V, TAU, C, LDC, WORK )
35 IMPLICIT NONE
36 *
37 * -- LAPACK auxiliary routine (version 3.3.1) --
38 * -- LAPACK is a software package provided by Univ. of Tennessee, --
39 * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
40 * -- April 2011 --
41 */
42
43 #ifndef FLENS_LAPACK_AUX_LARFX_TCC
44 #define FLENS_LAPACK_AUX_LARFX_TCC 1
45
46 #include <flens/blas/blas.h>
47 #include <flens/lapack/lapack.h>
48
49 namespace flens { namespace lapack {
50
51 //== generic lapack implementation =============================================
52
53 template <typename VV, typename TAU, typename MC, typename VWORK>
54 void
55 larfx_generic(Side side, const DenseVector<VV> &v, const TAU &tau,
56 GeMatrix<MC> &C, DenseVector<VWORK> &work)
57 {
58 typedef typename GeMatrix<MC>::IndexType IndexType;
59 typedef typename GeMatrix<MC>::ElementType T;
60
61 const IndexType m = C.numRows();
62 const IndexType n = C.numCols();
63
64 const T Zero(0), One(1);
65
66 if (tau==Zero) {
67 return;
68 }
69 if (side==Left) {
70 //
71 // Form H * C, where H has order m.
72 //
73 switch(m) {
74 //
75 // Special code for 1 x 1 Householder
76 //
77 case 1:
78 {
79 const T tmp = One - tau*v(1)*v(1);
80 for (IndexType j=1; j<=n; ++j) {
81 C(1,j) *= tmp;
82 }
83 }
84 return;
85 //
86 // Special code for 2 x 2 Householder
87 //
88 case 2:
89 {
90 const T v1 = v(1);
91 const T t1 = tau*v1;
92 const T v2 = v(2);
93 const T t2 = tau*v2;
94 for (IndexType j=1; j<=n; ++j) {
95 const T sum = v1*C(1,j) + v2*C(2,j);
96 C(1,j) -= sum*t1;
97 C(2,j) -= sum*t2;
98 }
99 }
100 return;
101 //
102 // Special code for 3 x 3 Householder
103 //
104 case 3:
105 {
106 const T v1 = v(1);
107 const T t1 = tau*v1;
108 const T v2 = v(2);
109 const T t2 = tau*v2;
110 const T v3 = v(3);
111 const T t3 = tau*v3;
112 for (IndexType j=1; j<=n; ++j) {
113 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j);
114 C(1,j) -= sum*t1;
115 C(2,j) -= sum*t2;
116 C(3,j) -= sum*t3;
117 }
118 }
119 return;
120 //
121 // Special code for 4 x 4 Householder
122 //
123 case 4:
124 {
125 const T v1 = v(1);
126 const T t1 = tau*v1;
127 const T v2 = v(2);
128 const T t2 = tau*v2;
129 const T v3 = v(3);
130 const T t3 = tau*v3;
131 const T v4 = v(4);
132 const T t4 = tau*v4;
133 for (IndexType j=1; j<=n; ++j) {
134 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j);
135 C(1,j) -= sum*t1;
136 C(2,j) -= sum*t2;
137 C(3,j) -= sum*t3;
138 C(4,j) -= sum*t4;
139 }
140 }
141 return;
142 //
143 // Special code for 5 x 5 Householder
144 //
145 case 5:
146 {
147 const T v1 = v(1);
148 const T t1 = tau*v1;
149 const T v2 = v(2);
150 const T t2 = tau*v2;
151 const T v3 = v(3);
152 const T t3 = tau*v3;
153 const T v4 = v(4);
154 const T t4 = tau*v4;
155 const T v5 = v(5);
156 const T t5 = tau*v5;
157 for (IndexType j=1; j<=n; ++j) {
158 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
159 + v5*C(5,j);
160 C(1,j) -= sum*t1;
161 C(2,j) -= sum*t2;
162 C(3,j) -= sum*t3;
163 C(4,j) -= sum*t4;
164 C(5,j) -= sum*t5;
165 }
166 }
167 return;
168 //
169 // Special code for 6 x 6 Householder
170 //
171 case 6:
172 {
173 const T v1 = v(1);
174 const T t1 = tau*v1;
175 const T v2 = v(2);
176 const T t2 = tau*v2;
177 const T v3 = v(3);
178 const T t3 = tau*v3;
179 const T v4 = v(4);
180 const T t4 = tau*v4;
181 const T v5 = v(5);
182 const T t5 = tau*v5;
183 const T v6 = v(6);
184 const T t6 = tau*v6;
185 for (IndexType j=1; j<=n; ++j) {
186 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
187 + v5*C(5,j) + v6*C(6,j);
188 C(1,j) -= sum*t1;
189 C(2,j) -= sum*t2;
190 C(3,j) -= sum*t3;
191 C(4,j) -= sum*t4;
192 C(5,j) -= sum*t5;
193 C(6,j) -= sum*t6;
194 }
195 }
196 return;
197 //
198 // Special code for 7 x 7 Householder
199 //
200 case 7:
201 {
202 const T v1 = v(1);
203 const T t1 = tau*v1;
204 const T v2 = v(2);
205 const T t2 = tau*v2;
206 const T v3 = v(3);
207 const T t3 = tau*v3;
208 const T v4 = v(4);
209 const T t4 = tau*v4;
210 const T v5 = v(5);
211 const T t5 = tau*v5;
212 const T v6 = v(6);
213 const T t6 = tau*v6;
214 const T v7 = v(7);
215 const T t7 = tau*v7;
216 for (IndexType j=1; j<=n; ++j) {
217 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
218 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j);
219 C(1,j) -= sum*t1;
220 C(2,j) -= sum*t2;
221 C(3,j) -= sum*t3;
222 C(4,j) -= sum*t4;
223 C(5,j) -= sum*t5;
224 C(6,j) -= sum*t6;
225 C(7,j) -= sum*t7;
226 }
227 }
228 return;
229 //
230 // Special code for 8 x 8 Householder
231 //
232 case 8:
233 {
234 const T v1 = v(1);
235 const T t1 = tau*v1;
236 const T v2 = v(2);
237 const T t2 = tau*v2;
238 const T v3 = v(3);
239 const T t3 = tau*v3;
240 const T v4 = v(4);
241 const T t4 = tau*v4;
242 const T v5 = v(5);
243 const T t5 = tau*v5;
244 const T v6 = v(6);
245 const T t6 = tau*v6;
246 const T v7 = v(7);
247 const T t7 = tau*v7;
248 const T v8 = v(8);
249 const T t8 = tau*v8;
250 for (IndexType j=1; j<=n; ++j) {
251 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
252 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j);
253 C(1,j) -= sum*t1;
254 C(2,j) -= sum*t2;
255 C(3,j) -= sum*t3;
256 C(4,j) -= sum*t4;
257 C(5,j) -= sum*t5;
258 C(6,j) -= sum*t6;
259 C(7,j) -= sum*t7;
260 C(8,j) -= sum*t8;
261 }
262 }
263 return;
264 //
265 // Special code for 9 x 9 Householder
266 //
267 case 9:
268 {
269 const T v1 = v(1);
270 const T t1 = tau*v1;
271 const T v2 = v(2);
272 const T t2 = tau*v2;
273 const T v3 = v(3);
274 const T t3 = tau*v3;
275 const T v4 = v(4);
276 const T t4 = tau*v4;
277 const T v5 = v(5);
278 const T t5 = tau*v5;
279 const T v6 = v(6);
280 const T t6 = tau*v6;
281 const T v7 = v(7);
282 const T t7 = tau*v7;
283 const T v8 = v(8);
284 const T t8 = tau*v8;
285 const T v9 = v(9);
286 const T t9 = tau*v9;
287 for (IndexType j=1; j<=n; ++j) {
288 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
289 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j)
290 + v9*C(9,j);
291 C(1,j) -= sum*t1;
292 C(2,j) -= sum*t2;
293 C(3,j) -= sum*t3;
294 C(4,j) -= sum*t4;
295 C(5,j) -= sum*t5;
296 C(6,j) -= sum*t6;
297 C(7,j) -= sum*t7;
298 C(8,j) -= sum*t8;
299 C(9,j) -= sum*t9;
300 }
301 }
302 return;
303 //
304 // Special code for 10 x 10 Householder
305 //
306 case 10:
307 {
308 const T v1 = v(1);
309 const T t1 = tau*v1;
310 const T v2 = v(2);
311 const T t2 = tau*v2;
312 const T v3 = v(3);
313 const T t3 = tau*v3;
314 const T v4 = v(4);
315 const T t4 = tau*v4;
316 const T v5 = v(5);
317 const T t5 = tau*v5;
318 const T v6 = v(6);
319 const T t6 = tau*v6;
320 const T v7 = v(7);
321 const T t7 = tau*v7;
322 const T v8 = v(8);
323 const T t8 = tau*v8;
324 const T v9 = v(9);
325 const T t9 = tau*v9;
326 const T v10 = v(10);
327 const T t10 = tau*v10;
328 for (IndexType j=1; j<=n; ++j) {
329 const T sum = v1*C(1,j) + v2*C(2,j) + v3*C(3,j) + v4*C(4,j)
330 + v5*C(5,j) + v6*C(6,j) + v7*C(7,j) + v8*C(8,j)
331 + v9*C(9,j) + v10*C(10,j);
332 C(1,j) -= sum*t1;
333 C(2,j) -= sum*t2;
334 C(3,j) -= sum*t3;
335 C(4,j) -= sum*t4;
336 C(5,j) -= sum*t5;
337 C(6,j) -= sum*t6;
338 C(7,j) -= sum*t7;
339 C(8,j) -= sum*t8;
340 C(9,j) -= sum*t9;
341 C(10,j) -= sum*t10;
342 }
343 }
344 return;
345 //
346 // Code for general M
347 //
348 default:
349 larf(side, v, tau, C, work);
350 return;
351 }
352 } else {
353 //
354 // Form C * H, where H has order n.
355 //
356 switch(n) {
357 //
358 // Special code for 1 x 1 Householder
359 //
360 case 1:
361 {
362 const T tmp = One - tau*v(1)*v(1);
363 for (IndexType i=1; i<=m; ++i) {
364 C(i,1) *= tmp;
365 }
366 }
367 return;
368 //
369 // Special code for 2 x 2 Householder
370 //
371 case 2:
372 {
373 const T v1 = v(1);
374 const T t1 = tau*v1;
375 const T v2 = v(2);
376 const T t2 = tau*v2;
377 for (IndexType i=1; i<=m; ++i) {
378 const T sum = v1*C(i,1) + v2*C(i,2);
379 C(i,1) -= sum*t1;
380 C(i,2) -= sum*t2;
381 }
382 }
383 return;
384 //
385 // Special code for 3 x 3 Householder
386 //
387 case 3:
388 {
389 const T v1 = v(1);
390 const T t1 = tau*v1;
391 const T v2 = v(2);
392 const T t2 = tau*v2;
393 const T v3 = v(3);
394 const T t3 = tau*v3;
395 for (IndexType i=1; i<=m; ++i) {
396 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3);
397 C(i,1) -= sum*t1;
398 C(i,2) -= sum*t2;
399 C(i,3) -= sum*t3;
400 }
401 }
402 return;
403 //
404 // Special code for 4 x 4 Householder
405 //
406 case 4:
407 {
408 const T v1 = v(1);
409 const T t1 = tau*v1;
410 const T v2 = v(2);
411 const T t2 = tau*v2;
412 const T v3 = v(3);
413 const T t3 = tau*v3;
414 const T v4 = v(4);
415 const T t4 = tau*v4;
416 for (IndexType i=1; i<=m; ++i) {
417 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4);
418 C(i,1) -= sum*t1;
419 C(i,2) -= sum*t2;
420 C(i,3) -= sum*t3;
421 C(i,4) -= sum*t4;
422 }
423 }
424 return;
425 //
426 // Special code for 5 x 5 Householder
427 //
428 case 5:
429 {
430 const T v1 = v(1);
431 const T t1 = tau*v1;
432 const T v2 = v(2);
433 const T t2 = tau*v2;
434 const T v3 = v(3);
435 const T t3 = tau*v3;
436 const T v4 = v(4);
437 const T t4 = tau*v4;
438 const T v5 = v(5);
439 const T t5 = tau*v5;
440 for (IndexType i=1; i<=m; ++i) {
441 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
442 + v5*C(i,5);
443 C(i,1) -= sum*t1;
444 C(i,2) -= sum*t2;
445 C(i,3) -= sum*t3;
446 C(i,4) -= sum*t4;
447 C(i,5) -= sum*t5;
448 }
449 }
450 return;
451 //
452 // Special code for 6 x 6 Householder
453 //
454 case 6:
455 {
456 const T v1 = v(1);
457 const T t1 = tau*v1;
458 const T v2 = v(2);
459 const T t2 = tau*v2;
460 const T v3 = v(3);
461 const T t3 = tau*v3;
462 const T v4 = v(4);
463 const T t4 = tau*v4;
464 const T v5 = v(5);
465 const T t5 = tau*v5;
466 const T v6 = v(6);
467 const T t6 = tau*v6;
468 for (IndexType i=1; i<=m; ++i) {
469 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
470 + v5*C(i,5) + v6*C(i,6);
471 C(i,1) -= sum*t1;
472 C(i,2) -= sum*t2;
473 C(i,3) -= sum*t3;
474 C(i,4) -= sum*t4;
475 C(i,5) -= sum*t5;
476 C(i,6) -= sum*t6;
477 }
478 }
479 return;
480 //
481 // Special code for 7 x 7 Householder
482 //
483 case 7:
484 {
485 const T v1 = v(1);
486 const T t1 = tau*v1;
487 const T v2 = v(2);
488 const T t2 = tau*v2;
489 const T v3 = v(3);
490 const T t3 = tau*v3;
491 const T v4 = v(4);
492 const T t4 = tau*v4;
493 const T v5 = v(5);
494 const T t5 = tau*v5;
495 const T v6 = v(6);
496 const T t6 = tau*v6;
497 const T v7 = v(7);
498 const T t7 = tau*v7;
499 for (IndexType i=1; i<=m; ++i) {
500 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
501 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7);
502 C(i,1) -= sum*t1;
503 C(i,2) -= sum*t2;
504 C(i,3) -= sum*t3;
505 C(i,4) -= sum*t4;
506 C(i,5) -= sum*t5;
507 C(i,6) -= sum*t6;
508 C(i,7) -= sum*t7;
509 }
510 }
511 return;
512 //
513 // Special code for 8 x 8 Householder
514 //
515 case 8:
516 {
517 const T v1 = v(1);
518 const T t1 = tau*v1;
519 const T v2 = v(2);
520 const T t2 = tau*v2;
521 const T v3 = v(3);
522 const T t3 = tau*v3;
523 const T v4 = v(4);
524 const T t4 = tau*v4;
525 const T v5 = v(5);
526 const T t5 = tau*v5;
527 const T v6 = v(6);
528 const T t6 = tau*v6;
529 const T v7 = v(7);
530 const T t7 = tau*v7;
531 const T v8 = v(8);
532 const T t8 = tau*v8;
533 for (IndexType i=1; i<=m; ++i) {
534 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
535 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8);
536 C(i,1) -= sum*t1;
537 C(i,2) -= sum*t2;
538 C(i,3) -= sum*t3;
539 C(i,4) -= sum*t4;
540 C(i,5) -= sum*t5;
541 C(i,6) -= sum*t6;
542 C(i,7) -= sum*t7;
543 C(i,8) -= sum*t8;
544 }
545 }
546 return;
547 //
548 // Special code for 9 x 9 Householder
549 //
550 case 9:
551 {
552 const T v1 = v(1);
553 const T t1 = tau*v1;
554 const T v2 = v(2);
555 const T t2 = tau*v2;
556 const T v3 = v(3);
557 const T t3 = tau*v3;
558 const T v4 = v(4);
559 const T t4 = tau*v4;
560 const T v5 = v(5);
561 const T t5 = tau*v5;
562 const T v6 = v(6);
563 const T t6 = tau*v6;
564 const T v7 = v(7);
565 const T t7 = tau*v7;
566 const T v8 = v(8);
567 const T t8 = tau*v8;
568 const T v9 = v(9);
569 const T t9 = tau*v9;
570 for (IndexType i=1; i<=m; ++i) {
571 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
572 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8)
573 + v9*C(i,9);
574 C(i,1) -= sum*t1;
575 C(i,2) -= sum*t2;
576 C(i,3) -= sum*t3;
577 C(i,4) -= sum*t4;
578 C(i,5) -= sum*t5;
579 C(i,6) -= sum*t6;
580 C(i,7) -= sum*t7;
581 C(i,8) -= sum*t8;
582 C(i,9) -= sum*t9;
583 }
584 }
585 return;
586 //
587 // Special code for 10 x 10 Householder
588 //
589 case 10:
590 {
591 const T v1 = v(1);
592 const T t1 = tau*v1;
593 const T v2 = v(2);
594 const T t2 = tau*v2;
595 const T v3 = v(3);
596 const T t3 = tau*v3;
597 const T v4 = v(4);
598 const T t4 = tau*v4;
599 const T v5 = v(5);
600 const T t5 = tau*v5;
601 const T v6 = v(6);
602 const T t6 = tau*v6;
603 const T v7 = v(7);
604 const T t7 = tau*v7;
605 const T v8 = v(8);
606 const T t8 = tau*v8;
607 const T v9 = v(9);
608 const T t9 = tau*v9;
609 const T v10 = v(10);
610 const T t10 = tau*v10;
611 for (IndexType i=1; i<=m; ++i) {
612 const T sum = v1*C(i,1) + v2*C(i,2) + v3*C(i,3) + v4*C(i,4)
613 + v5*C(i,5) + v6*C(i,6) + v7*C(i,7) + v8*C(i,8)
614 + v9*C(i,9) + v10*C(i,10);
615 C(i,1) -= sum*t1;
616 C(i,2) -= sum*t2;
617 C(i,3) -= sum*t3;
618 C(i,4) -= sum*t4;
619 C(i,5) -= sum*t5;
620 C(i,6) -= sum*t6;
621 C(i,7) -= sum*t7;
622 C(i,8) -= sum*t8;
623 C(i,9) -= sum*t9;
624 C(i,10) -= sum*t10;
625 }
626 }
627 return;
628 //
629 // Code for general N
630 //
631 default:
632 larf(side, v, tau, C, work);
633 return;
634 }
635 }
636 }
637
638 //== interface for native lapack ===============================================
639
640 #ifdef CHECK_CXXLAPACK
641
642 template <typename VV, typename TAU, typename MC, typename VWORK>
643 void
644 larfx_native(Side side, const DenseVector<VV> &v, const TAU &tau,
645 GeMatrix<MC> &C, DenseVector<VWORK> &work)
646 {
647 typedef typename GeMatrix<MC>::ElementType T;
648
649 const char SIDE = getF77LapackChar<Side>(side);
650 const INTEGER M = C.numRows();
651 const INTEGER N = C.numCols();
652 const INTEGER LDC = C.leadingDimension();
653
654 if (IsSame<T,DOUBLE>::value) {
655 LAPACK_IMPL(dlarfx)(&SIDE,
656 &M,
657 &N,
658 v.data(),
659 &tau,
660 C.data(),
661 &LDC,
662 work.data());
663 } else {
664 ASSERT(0);
665 }
666 }
667
668 #endif // CHECK_CXXLAPACK
669
670 //== public interface ==========================================================
671
672 template <typename VV, typename TAU, typename MC, typename VWORK>
673 void
674 larfx(Side side, const DenseVector<VV> &v, const TAU &tau,
675 GeMatrix<MC> &C, DenseVector<VWORK> &work)
676 {
677 LAPACK_DEBUG_OUT("larfx");
678
679 //
680 // Test the input parameters
681 //
682 # ifndef NDEBUG
683 typedef typename GeMatrix<MC>::IndexType IndexType;
684
685 ASSERT(v.inc()>0);
686 ASSERT(v.firstIndex()==1);
687 ASSERT(C.firstRow()==1);
688 ASSERT(C.firstCol()==1);
689
690 const IndexType m = C.numRows();
691 const IndexType n = C.numCols();
692
693 if (side==Left) {
694 ASSERT(v.length()==m);
695 ASSERT(work.length()==n);
696 } else {
697 ASSERT(v.length()==n);
698 ASSERT(work.length()==m);
699 }
700
701 # endif
702
703 # ifdef CHECK_CXXLAPACK
704 //
705 // Make copies of output arguments
706 //
707 typename GeMatrix<MC>::NoView C_org = C;
708 typename DenseVector<VV>::NoView work_org = work;
709 # endif
710
711 //
712 // Call implementation
713 //
714 larfx_generic(side, v, tau, C, work);
715
716 # ifdef CHECK_CXXLAPACK
717 //
718 // Make copies of results computed by the generic implementation
719 //
720 typename GeMatrix<MC>::NoView C_generic = C;
721 typename DenseVector<VV>::NoView work_generic = work;
722
723 //
724 // restore output arguments
725 //
726 C = C_org;
727 work = work_org;
728
729 //
730 // Compare generic results with results from the native implementation
731 //
732 larfx_native(side, v, tau, C, work);
733
734 bool failed = false;
735
736 if (! isIdentical(C_generic, C, "C_generic", "C")) {
737 std::cerr << "CXXLAPACK: C_generic = " << C_generic << std::endl;
738 std::cerr << "F77LAPACK: C = " << C << std::endl;
739 failed = true;
740 }
741
742 if (! isIdentical(work_generic, work, "work_generic", "work")) {
743 std::cerr << "CXXLAPACK: work_generic = " << work_generic << std::endl;
744 std::cerr << "F77LAPACK: work = " << work << std::endl;
745 failed = true;
746 }
747
748 if (failed) {
749 ASSERT(0);
750 }
751 # endif
752 }
753
754 //-- forwarding ----------------------------------------------------------------
755 template <typename VV, typename TAU, typename MC, typename VWORK>
756 void
757 larfx(Side side, const VV &v, const TAU &tau, MC &&C, VWORK &&work)
758 {
759 larfx(side, v, tau, C, work);
760 }
761
762 } } // namespace lapack, flens
763
764 #endif // FLENS_LAPACK_AUX_LARFX_TCC