1 /*
2 * Copyright (c) 2007, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_CLOSURES_COPY_TCC
34 #define FLENS_BLAS_CLOSURES_COPY_TCC 1
35
36 #include <flens/aux/aux.h>
37 #include <flens/blas/closures/debugclosure.h>
38 #include <flens/blas/closures/mmswitch.h>
39 #include <flens/blas/closures/mvswitch.h>
40 #include <flens/blas/closures/result.h>
41 #include <flens/blas/closures/residual.h>
42 #include <flens/blas/level1/level1.h>
43 #include <flens/blas/level2/level2.h>
44 #include <flens/typedefs.h>
45
46 #ifdef FLENS_DEBUG_CLOSURES
47 # include <flens/blas/blaslogon.h>
48 #else
49 # include <flens/blas/blaslogoff.h>
50 #endif
51
52 namespace flens { namespace blas {
53
54 //
55 //== vector closures ===========================================================
56 //
57
58 //
59 // Auxiliary function for
60 // y = x1 + x2 (alpha= 1)
61 // or y = x1 - x2 (alpha=-1)
62 //
63 template <typename VX1, typename ALPHA, typename VX2, typename VY>
64 void
65 copySum(const VX1 &x1, const ALPHA &alpha, const VX2 &x2, VY &y)
66 {
67 ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
68 //
69 // In debug-closure-mode we check if x2 has to stored in a temporary.
70 // Otherwise an assertion gets triggered if x2 and y are identical of if x2
71 // is a closure that contains y.
72 //
73 # ifdef FLENS_DEBUG_CLOSURES
74 bool tmpNeeded = DebugClosure::search(x2, y);
75 typename Result<VX2>::NoView tmp;
76
77 if (tmpNeeded) {
78 tmp = x2;
79 FLENS_BLASLOG_TMP_ADD(tmp);
80 }
81 # else
82 ASSERT(!DebugClosure::search(x2, y));
83 # endif
84
85 //
86 // y = x1
87 //
88 blas::copy(x1, y);
89 //
90 // y += x2 or y -= x2
91 //
92 # ifdef FLENS_DEBUG_CLOSURES
93 if (tmpNeeded) {
94 blas::axpy(alpha, tmp, y);
95 FLENS_BLASLOG_TMP_REMOVE(tmp, x2);
96 } else {
97 blas::axpy(alpha, x2, y);
98 }
99 # else
100 blas::axpy(alpha, x2, y);
101 # endif
102
103 }
104
105 //------------------------------------------------------------------------------
106 //
107 // y = x1 + x2
108 //
109 template <typename VL, typename VR, typename VY>
110 typename RestrictTo<!ClosureType<OpAdd, VL, VR>::isMatrixVectorProduct,
111 void>::Type
112 copy(const VectorClosure<OpAdd, VL, VR> &x, Vector<VY> &y)
113 {
114 FLENS_BLASLOG_BEGIN_COPY(x, y);
115
116 typedef typename VY::Impl::ElementType T;
117 const T One(1);
118 copySum(x.left(), One, x.right(), y.impl());
119
120 FLENS_BLASLOG_END;
121 }
122
123 //------------------------------------------------------------------------------
124 //
125 // y = x1 - x2
126 //
127 template <typename VL, typename VR, typename VY>
128 typename RestrictTo<!ClosureType<OpSub, VL, VR>::isResidual,
129 void>::Type
130 copy(const VectorClosure<OpSub, VL, VR> &x, Vector<VY> &y)
131 {
132 FLENS_BLASLOG_BEGIN_COPY(x, y);
133
134 typedef typename VY::Impl::ElementType T;
135 const T MinusOne(-1);
136 copySum(x.left(), MinusOne, x.right(), y.impl());
137
138 FLENS_BLASLOG_END;
139 }
140
141 //------------------------------------------------------------------------------
142 //
143 // y = alpha*x
144 //
145 // We evalute this with a scalSwitch
146 // case 1: x is no closure
147 // case 2: x is a closure
148
149 //
150 // case 1: x is no closure
151 //
152 template <typename ALPHA, typename VX, typename VY>
153 void
154 scalSwitch(const ALPHA &alpha, const Vector<VX> &x, Vector<VY> &y)
155 {
156 using namespace DEBUGCLOSURE;
157
158 if (identical(x.impl(), y.impl())) {
159 scal(alpha, y.impl());
160 } else {
161 //
162 // Zero should have the same type as elements of y so that CBLAS
163 // gets called if possible.
164 //
165 typename Vector<VX>::Impl::ElementType Zero(0);
166 scal(Zero, y.impl());
167
168 axpy(alpha, x.impl(), y.impl());
169 }
170 }
171
172 //
173 // case 2: x is a closure
174 //
175 template <typename ALPHA, typename Op, typename L, typename R, typename VY>
176 void
177 scalSwitch(const ALPHA &alpha, const VectorClosure<Op, L, R> &x, Vector<VY> &y)
178 {
179 copy(x, y.impl());
180 scal(alpha, y.impl());
181 }
182
183 //
184 // entry point for switch
185 //
186 template <typename T, typename VX, typename VY>
187 void
188 copy(const VectorClosure<OpMult, ScalarValue<T>, VX> &x, Vector<VY> &y)
189 {
190 FLENS_BLASLOG_BEGIN_COPY(x, y);
191 scalSwitch(x.left().value(), x.right(), y.impl());
192 FLENS_BLASLOG_END;
193 }
194
195
196 //------------------------------------------------------------------------------
197 //
198 // y = x/alpha
199 //
200 // We evalute this with a rscalSwitch
201 // case 1: x is no closure
202 // case 2: x is a closure
203
204 //
205 // case 1: x is no closure
206 //
207 template <typename ALPHA, typename VX, typename VY>
208 void
209 rscalSwitch(const ALPHA &alpha, const Vector<VX> &x, Vector<VY> &y)
210 {
211 using namespace DEBUGCLOSURE;
212
213 if (identical(x.impl(), y.impl())) {
214 rscal(alpha, y.impl());
215 } else {
216 //
217 // Zero should have the same type as elements of y so that CBLAS
218 // gets called if possible.
219 //
220 typename Vector<VX>::Impl::ElementType Zero(0);
221 scal(Zero, y.impl());
222
223 raxpy(alpha, x.impl(), y.impl());
224 }
225 }
226
227 //
228 // case 2: x is a closure
229 //
230 template <typename ALPHA, typename Op, typename L, typename R, typename VY>
231 void
232 rscalSwitch(const ALPHA &alpha, const VectorClosure<Op, L, R> &x, Vector<VY> &y)
233 {
234 copy(x, y.impl());
235 rscal(alpha, y.impl());
236 }
237
238 //
239 // entry point for switch
240 //
241 template <typename VX, typename T, typename VY>
242 void
243 copy(const VectorClosure<OpDiv, VX, ScalarValue<T> > &x, Vector<VY> &y)
244 {
245 FLENS_BLASLOG_BEGIN_COPY(x, y);
246 rscalSwitch(x.right().value(), x.left(), y.impl());
247 FLENS_BLASLOG_END;
248 }
249
250 //------------------------------------------------------------------------------
251 //
252 // y = conjugate(x)
253 //
254 template <typename VX, typename VY>
255 void
256 copy(const VectorClosureOpConj<VX> &, Vector<VY> &)
257 {
258 ERROR_MSG("Lehn: Will be implemented on demand");
259 ASSERT(0);
260 }
261
262 //------------------------------------------------------------------------------
263 //
264 // y = beta*y + alpha*op(A)*x
265 //
266 template <typename VL, typename MVR, typename VY>
267 typename RestrictTo<ClosureType<OpAdd, VL, MVR>::isMatrixVectorProduct,
268 void>::Type
269 copy(const VectorClosure<OpAdd, VL, MVR> &ypAx, Vector<VY> &y)
270 {
271 FLENS_BLASLOG_BEGIN_COPY(ypAx, y);
272
273 using namespace DEBUGCLOSURE;
274 //
275 // check if y form rhs and lhs are identical
276 //
277 typedef typename PruneScaling<VL>::Remainder RVL;
278
279 const RVL &_y = PruneScaling<VL>::getRemainder(ypAx.left());
280
281 if (!identical(_y, y.impl())) {
282 typedef typename VY::Impl::ElementType TY;
283 const TY One(1);
284 copySum(ypAx.left(), One, ypAx.right(), y.impl());
285 FLENS_BLASLOG_END;
286 return;
287 }
288 //
289 // get factor beta
290 //
291 typedef typename PruneScaling<VL>::ScalingType SVL;
292 const SVL &beta = PruneScaling<VL>::getFactor(ypAx.left());
293 //
294 // Rest gets done by the mv switch
295 //
296 typedef typename MVR::Left::ElementType TA;
297 const auto &A = ypAx.right().left();
298 const auto &x = ypAx.right().right();
299
300 mvSwitch(NoTrans, TA(1), A, x, beta, y.impl());
301
302 FLENS_BLASLOG_END;
303 }
304
305 //------------------------------------------------------------------------------
306 //
307 // y = A*x
308 //
309 template <typename ML, typename VR, typename VY>
310 typename RestrictTo<ClosureType<OpMult, ML, VR>::isMatrixVectorProduct,
311 void>::Type
312 copy(const VectorClosure<OpMult, ML, VR> &Ax, Vector<VY> &y)
313 {
314 FLENS_BLASLOG_BEGIN_COPY(Ax, y);
315
316 //
317 // Call mv switch
318 //
319 typedef typename ML::Impl::ElementType TA;
320 typedef typename VY::Impl::ElementType TY;
321 mvSwitch(NoTrans, TA(1), Ax.left(), Ax.right(), TY(0), y.impl());
322
323 FLENS_BLASLOG_END;
324 }
325
326 //------------------------------------------------------------------------------
327 //
328 // y = x*A
329 //
330 template <typename VL, typename MR, typename VY>
331 typename RestrictTo<ClosureType<OpMult, VL, MR>::isVectorMatrixProduct,
332 void>::Type
333 copy(const VectorClosure<OpMult, VL, MR> &xA, Vector<VY> &y)
334 {
335 FLENS_BLASLOG_BEGIN_COPY(xA, y);
336
337 //
338 // Call mv switch
339 //
340 typedef typename MR::Impl::ElementType TA;
341 typedef typename VY::Impl::ElementType TY;
342 mvSwitch(Trans, TA(1), xA.right(), xA.left(), TY(0), y.impl());
343
344 FLENS_BLASLOG_END;
345 }
346
347 //------------------------------------------------------------------------------
348 //
349 // y = b - A*x
350 //
351 template <typename L, typename R, typename VY>
352 typename RestrictTo<ClosureType<OpSub, L, R>::isResidual,
353 void>::Type
354 copy(const VectorClosure<OpSub, L, R> &bmAx, Vector<VY> &y)
355 {
356 FLENS_BLASLOG_BEGIN_COPY(bmAx, y);
357
358 const auto &b = bmAx.left();
359 const auto &A = bmAx.right().left();
360 const auto &x = bmAx.right().right();
361
362 residual(b, A, x, y.impl());
363
364 FLENS_BLASLOG_END;
365 }
366
367 //------------------------------------------------------------------------------
368 //
369 // y = <Unknown Closure>
370 //
371 #ifndef FLENS_DEBUG_CLOSURES
372
373 template <typename Op, typename VL, typename VR, typename VY>
374 void
375 copy(const VectorClosure<Op, VL, VR> &, Vector<VY> &)
376 {
377 ASSERT(0);
378 }
379
380 #else
381
382 template <typename Op, typename VL, typename VR, typename VY>
383 void
384 copy(const VectorClosure<Op, VL, VR> &x, Vector<VY> &y)
385 {
386 FLENS_BLASLOG_ERROR_COPY(x, y);
387 ASSERT(0);
388 }
389
390 #endif
391
392 //== matrix closures ===========================================================
393 //
394 // In the following comments op(X) denotes X, X^T or X^H
395 //
396
397 //
398 // Auxiliary function for
399 // B = op(A1 + A2) (alpha= 1)
400 // or B = op(A1 - A2) (alpha=-1)
401 //
402 template <typename MA1, typename ALPHA, typename MA2, typename MB>
403 void
404 copySum(Transpose trans,
405 const MA1 &A1, const ALPHA &alpha, const MA2 &A2, MB &B)
406 {
407 ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
408 //
409 // In debug-closure-mode we check if A2 has to stored in a temporary.
410 // Otherwise an assertion gets triggered if A2 and B are identical of if A2
411 // is a closure that contains B.
412 //
413 # ifdef FLENS_DEBUG_CLOSURES
414 bool tmpNeeded = DebugClosure::search(A2, B);
415 typename Result<MA2>::NoView tmp;
416
417 if (tmpNeeded) {
418 tmp = A2;
419 FLENS_BLASLOG_TMP_ADD(tmp);
420 }
421 # else
422 ASSERT(!DebugClosure::search(A2, B));
423 # endif
424
425 //
426 // B = A1
427 //
428 blas::copy(trans, A1, B);
429 //
430 // B += A2 or B -= A2
431 //
432 # ifdef FLENS_DEBUG_CLOSURES
433 if (tmpNeeded) {
434 blas::axpy(trans, alpha, tmp, B);
435 FLENS_BLASLOG_TMP_REMOVE(tmp, A2);
436 } else {
437 blas::axpy(trans, alpha, A2, B);
438 }
439 # else
440 blas::axpy(trans, alpha, A2, B);
441 # endif
442
443 }
444
445 //------------------------------------------------------------------------------
446 //
447 // B = op(A1 + A2)
448 //
449 template <typename ML, typename MR, typename MB>
450 typename RestrictTo<!ClosureType<OpAdd, ML, MR>::isMatrixMatrixProduct,
451 void>::Type
452 copy(Transpose trans, const MatrixClosure<OpAdd, ML, MR> &A, Matrix<MB> &B)
453 {
454 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
455
456 typename MB::Impl::ElementType One(1);
457 copySum(trans, A.left(), One, A.right(), B.impl());
458
459 FLENS_BLASLOG_END;
460 }
461
462 //------------------------------------------------------------------------------
463 //
464 // B = op(A1 - A2)
465 //
466 template <typename ML, typename MR, typename MB>
467 void
468 copy(Transpose trans, const MatrixClosure<OpSub, ML, MR> &A, Matrix<MB> &B)
469 {
470 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
471
472 typename MB::Impl::ElementType MinusOne(-1);
473 copySum(trans, A.left(), MinusOne, A.right(), B.impl());
474
475 FLENS_BLASLOG_END;
476 }
477
478 //------------------------------------------------------------------------------
479 //
480 // B = alpha*op(A)
481 //
482 // We evalute this with a scalSwitch
483 // case 1: A is no closure
484 // case 2: A is a closure
485
486 //
487 // case 1: A is no closure
488 //
489 template <typename ALPHA, typename MA, typename MB>
490 void
491 scalSwitch(Transpose trans,
492 const ALPHA &alpha, const Matrix<MA> &A, Matrix<MB> &B)
493 {
494 using namespace DEBUGCLOSURE;
495
496 if (identical(A.impl(), B.impl())) {
497 scal(alpha, B.impl());
498 if (trans!=NoTrans) {
499 cotr(trans, B.impl());
500 }
501 } else {
502 //
503 // Zero should have the same type as elements of y so that CBLAS
504 // gets called if possible.
505 //
506 typename Matrix<MA>::Impl::ElementType Zero(0);
507 scal(Zero, B.impl());
508
509 axpy(trans, alpha, A.impl(), B.impl());
510 }
511 }
512
513 //
514 // case 2: A is a closure
515 //
516 template <typename ALPHA, typename Op, typename L, typename R, typename MB>
517 void
518 scalSwitch(Transpose trans,
519 const ALPHA &alpha, const MatrixClosure<Op, L, R> &A, Matrix<MB> &B)
520 {
521 copy(trans, A, B.impl());
522 scal(alpha, B.impl());
523 }
524
525 template <typename T, typename MA, typename MB>
526 void
527 copy(Transpose trans,
528 const MatrixClosure<OpMult, ScalarValue<T>, MA> &A, Matrix<MB> &B)
529 {
530 FLENS_BLASLOG_BEGIN_COPY(A, B);
531 scalSwitch(trans, A.left().value(), A.right(), B.impl());
532 FLENS_BLASLOG_END;
533 }
534
535 //------------------------------------------------------------------------------
536 //
537 // B = op(A)/alpha
538 //
539
540 // We evalute this with a scalSwitch
541 // case 1: A is no closure
542 // case 2: A is a closure
543
544 //
545 // case 1: A is no closure
546 //
547 template <typename ALPHA, typename MA, typename MB>
548 void
549 rscalSwitch(Transpose trans,
550 const ALPHA &alpha, const Matrix<MA> &A, Matrix<MB> &B)
551 {
552 using namespace DEBUGCLOSURE;
553
554 if (identical(A.impl(), B.impl())) {
555 rscal(alpha, B.impl());
556 if (trans!=NoTrans) {
557 cotr(trans, B.impl());
558 }
559 } else {
560 //
561 // Zero should have the same type as elements of y so that CBLAS
562 // gets called if possible.
563 //
564 typename Matrix<MA>::Impl::ElementType Zero(0);
565 scal(Zero, B.impl());
566
567 raxpy(trans, alpha, A.impl(), B.impl());
568 }
569 }
570
571 //
572 // case 2: A is a closure
573 //
574 template <typename ALPHA, typename Op, typename L, typename R, typename MB>
575 void
576 rscalSwitch(Transpose trans,
577 const ALPHA &alpha, const MatrixClosure<Op, L, R> &A, Matrix<MB> &B)
578 {
579 copy(trans, A, B.impl());
580 rscal(alpha, B.impl());
581 }
582
583 template <typename MA, typename T, typename MB>
584 void
585 copy(Transpose trans,
586 const MatrixClosure<OpDiv, MA, ScalarValue<T> > &A, Matrix<MB> &B)
587 {
588 FLENS_BLASLOG_BEGIN_COPY(A, B);
589 rscalSwitch(trans, A.right().value(), A.left(), B.impl());
590 FLENS_BLASLOG_END;
591 }
592
593 //------------------------------------------------------------------------------
594 //
595 // B = op(A^T)
596 //
597 template <typename MA, typename MB>
598 void
599 copy(Transpose trans, const MatrixClosureOpTrans<MA> &A, Matrix<MB> &B)
600 {
601 using namespace DEBUGCLOSURE;
602
603 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
604
605 Transpose _trans = Transpose(trans^Trans);
606
607 copy(_trans, A.left(), B.impl());
608
609 FLENS_BLASLOG_END;
610 }
611
612 //------------------------------------------------------------------------------
613 //
614 // C = beta*C + A*B
615 //
616 // We evalute this with a mvSwitch
617 // case 1: A is no closure
618 // case 2: A is a scaling closure (i.e. scale*A)
619 // case 3: A is some other closure
620 // in all cases B is no closure.
621
622
623
624
625 //------------------------------------------------------------------------------
626 //
627 // C = beta*C + A*B
628 //
629 template <typename ML, typename MR, typename MC>
630 typename RestrictTo<ClosureType<OpAdd, ML, MR>::isMatrixMatrixProduct,
631 void>::Type
632 copy(Transpose trans, const MatrixClosure<OpAdd, ML, MR> &CPAB, Matrix<MC> &C)
633 {
634 FLENS_BLASLOG_BEGIN_COPY(CPAB, C);
635
636 using namespace DEBUGCLOSURE;
637 //
638 // check if C form rhs and lhs are identical
639 //
640 typedef typename PruneScaling<ML>::Remainder RML;
641
642 const RML &_C = PruneScaling<ML>::getRemainder(CPAB.left());
643
644 if (trans!=NoTrans || !identical(_C, C.impl())) {
645 typedef typename MC::Impl::ElementType TC;
646 const TC One(1);
647 copySum(trans, CPAB.left(), One, CPAB.right(), C.impl());
648 FLENS_BLASLOG_END;
649 return;
650 }
651 //
652 // get factor beta
653 //
654 typedef typename PruneScaling<ML>::ScalingType SML;
655 const SML &beta = PruneScaling<ML>::getFactor(CPAB.left());
656 //
657 // Rest gets done by the mv switch
658 //
659 typedef typename MR::Left::ElementType TA;
660 const auto &A = CPAB.right().left();
661 const auto &B = CPAB.right().right();
662
663 if (trans==NoTrans || trans==Conj) {
664 mmSwitch(trans, trans, TA(1), A, B, beta, C.impl());
665 } else {
666 mmSwitch(trans, trans, TA(1), B, A, beta, C.impl());
667 }
668
669 FLENS_BLASLOG_END;
670 }
671
672 //------------------------------------------------------------------------------
673 //
674 // C = A*B
675 //
676 template <typename MA, typename MB, typename MC>
677 typename RestrictTo<ClosureType<OpMult, MA, MB>::isMatrixMatrixProduct,
678 void>::Type
679 copy(Transpose trans, const MatrixClosure<OpMult, MA, MB> &AB, Matrix<MC> &C)
680 {
681 FLENS_BLASLOG_BEGIN_MCOPY(trans, AB, C);
682
683 //
684 // Call mv switch
685 //
686 typedef typename MA::Impl::ElementType TA;
687 typedef typename MC::Impl::ElementType TC;
688
689 const auto &A = AB.left();
690 const auto &B = AB.right();
691
692 if (trans==NoTrans || trans==Conj) {
693 mmSwitch(trans, trans, TA(1), A, B, TC(0), C.impl());
694 } else {
695 mmSwitch(trans, trans, TA(1), B, A, TC(0), C.impl());
696 }
697
698 FLENS_BLASLOG_END;
699 }
700
701
702 //------------------------------------------------------------------------------
703 //
704 // B = <Unknown Closure>
705 //
706 #ifndef FLENS_DEBUG_CLOSURES
707
708 template <typename Op, typename ML, typename MR, typename MB>
709 void
710 copy(Transpose, const MatrixClosure<Op, ML, MR> &, Matrix<MB> &)
711 {
712 ERROR_MSG("B = <Unknown Closure>");
713 ASSERT(0);
714 }
715
716 #else
717
718 template <typename Op, typename ML, typename MR, typename MB>
719 void
720 copy(Transpose trans, const MatrixClosure<Op, ML, MR> &A, Matrix<MB> &B)
721 {
722 FLENS_BLASLOG_ERROR_MCOPY(trans, A, B);
723 ERROR_MSG("B = <Unknown Closure>");
724 ASSERT(0);
725 }
726
727 #endif
728
729 //-- symmetric matrices --------------------------------------------------------
730 //
731 // We just trans is NoTrans or Trans we simply ignore it
732 //
733 template <typename MA, typename MB>
734 void
735 copy(Transpose trans, const SymmetricMatrix<MA> &A, Matrix<MB> &B)
736 {
737 # ifndef FLENS_DEBUG_CLOSURES
738 ASSERT(trans==NoTrans || trans==Trans);
739 # else
740 if (trans!=NoTrans && trans!=Trans) {
741 typedef typename MA::ElementType TA;
742
743 GeMatrix<FullStorage<TA> > _A = A;
744 FLENS_BLASLOG_TMP_ADD(_A);
745
746 copy(trans, _A, B.impl());
747
748 FLENS_BLASLOG_TMP_REMOVE(_A, A);
749 return;
750 }
751 # endif
752 copy(A.impl(), B.impl());
753 }
754
755 template <typename MA, typename MB>
756 typename RestrictTo<!IsSymmetricMatrix<MA>::value,
757 void>::Type
758 copy(Transpose trans, const Matrix<MA> &A, SymmetricMatrix<MB> &B)
759 {
760 copy(trans, A.impl(), B.impl());
761 }
762
763 } } // namespace blas, flens
764
765 #endif // FLENS_BLAS_CLOSURES_COPY_TCC
2 * Copyright (c) 2007, Michael Lehn
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1) Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2) Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
15 * distribution.
16 * 3) Neither the name of the FLENS development group nor the names of
17 * its contributors may be used to endorse or promote products derived
18 * from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 #ifndef FLENS_BLAS_CLOSURES_COPY_TCC
34 #define FLENS_BLAS_CLOSURES_COPY_TCC 1
35
36 #include <flens/aux/aux.h>
37 #include <flens/blas/closures/debugclosure.h>
38 #include <flens/blas/closures/mmswitch.h>
39 #include <flens/blas/closures/mvswitch.h>
40 #include <flens/blas/closures/result.h>
41 #include <flens/blas/closures/residual.h>
42 #include <flens/blas/level1/level1.h>
43 #include <flens/blas/level2/level2.h>
44 #include <flens/typedefs.h>
45
46 #ifdef FLENS_DEBUG_CLOSURES
47 # include <flens/blas/blaslogon.h>
48 #else
49 # include <flens/blas/blaslogoff.h>
50 #endif
51
52 namespace flens { namespace blas {
53
54 //
55 //== vector closures ===========================================================
56 //
57
58 //
59 // Auxiliary function for
60 // y = x1 + x2 (alpha= 1)
61 // or y = x1 - x2 (alpha=-1)
62 //
63 template <typename VX1, typename ALPHA, typename VX2, typename VY>
64 void
65 copySum(const VX1 &x1, const ALPHA &alpha, const VX2 &x2, VY &y)
66 {
67 ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
68 //
69 // In debug-closure-mode we check if x2 has to stored in a temporary.
70 // Otherwise an assertion gets triggered if x2 and y are identical of if x2
71 // is a closure that contains y.
72 //
73 # ifdef FLENS_DEBUG_CLOSURES
74 bool tmpNeeded = DebugClosure::search(x2, y);
75 typename Result<VX2>::NoView tmp;
76
77 if (tmpNeeded) {
78 tmp = x2;
79 FLENS_BLASLOG_TMP_ADD(tmp);
80 }
81 # else
82 ASSERT(!DebugClosure::search(x2, y));
83 # endif
84
85 //
86 // y = x1
87 //
88 blas::copy(x1, y);
89 //
90 // y += x2 or y -= x2
91 //
92 # ifdef FLENS_DEBUG_CLOSURES
93 if (tmpNeeded) {
94 blas::axpy(alpha, tmp, y);
95 FLENS_BLASLOG_TMP_REMOVE(tmp, x2);
96 } else {
97 blas::axpy(alpha, x2, y);
98 }
99 # else
100 blas::axpy(alpha, x2, y);
101 # endif
102
103 }
104
105 //------------------------------------------------------------------------------
106 //
107 // y = x1 + x2
108 //
109 template <typename VL, typename VR, typename VY>
110 typename RestrictTo<!ClosureType<OpAdd, VL, VR>::isMatrixVectorProduct,
111 void>::Type
112 copy(const VectorClosure<OpAdd, VL, VR> &x, Vector<VY> &y)
113 {
114 FLENS_BLASLOG_BEGIN_COPY(x, y);
115
116 typedef typename VY::Impl::ElementType T;
117 const T One(1);
118 copySum(x.left(), One, x.right(), y.impl());
119
120 FLENS_BLASLOG_END;
121 }
122
123 //------------------------------------------------------------------------------
124 //
125 // y = x1 - x2
126 //
127 template <typename VL, typename VR, typename VY>
128 typename RestrictTo<!ClosureType<OpSub, VL, VR>::isResidual,
129 void>::Type
130 copy(const VectorClosure<OpSub, VL, VR> &x, Vector<VY> &y)
131 {
132 FLENS_BLASLOG_BEGIN_COPY(x, y);
133
134 typedef typename VY::Impl::ElementType T;
135 const T MinusOne(-1);
136 copySum(x.left(), MinusOne, x.right(), y.impl());
137
138 FLENS_BLASLOG_END;
139 }
140
141 //------------------------------------------------------------------------------
142 //
143 // y = alpha*x
144 //
145 // We evalute this with a scalSwitch
146 // case 1: x is no closure
147 // case 2: x is a closure
148
149 //
150 // case 1: x is no closure
151 //
152 template <typename ALPHA, typename VX, typename VY>
153 void
154 scalSwitch(const ALPHA &alpha, const Vector<VX> &x, Vector<VY> &y)
155 {
156 using namespace DEBUGCLOSURE;
157
158 if (identical(x.impl(), y.impl())) {
159 scal(alpha, y.impl());
160 } else {
161 //
162 // Zero should have the same type as elements of y so that CBLAS
163 // gets called if possible.
164 //
165 typename Vector<VX>::Impl::ElementType Zero(0);
166 scal(Zero, y.impl());
167
168 axpy(alpha, x.impl(), y.impl());
169 }
170 }
171
172 //
173 // case 2: x is a closure
174 //
175 template <typename ALPHA, typename Op, typename L, typename R, typename VY>
176 void
177 scalSwitch(const ALPHA &alpha, const VectorClosure<Op, L, R> &x, Vector<VY> &y)
178 {
179 copy(x, y.impl());
180 scal(alpha, y.impl());
181 }
182
183 //
184 // entry point for switch
185 //
186 template <typename T, typename VX, typename VY>
187 void
188 copy(const VectorClosure<OpMult, ScalarValue<T>, VX> &x, Vector<VY> &y)
189 {
190 FLENS_BLASLOG_BEGIN_COPY(x, y);
191 scalSwitch(x.left().value(), x.right(), y.impl());
192 FLENS_BLASLOG_END;
193 }
194
195
196 //------------------------------------------------------------------------------
197 //
198 // y = x/alpha
199 //
200 // We evalute this with a rscalSwitch
201 // case 1: x is no closure
202 // case 2: x is a closure
203
204 //
205 // case 1: x is no closure
206 //
207 template <typename ALPHA, typename VX, typename VY>
208 void
209 rscalSwitch(const ALPHA &alpha, const Vector<VX> &x, Vector<VY> &y)
210 {
211 using namespace DEBUGCLOSURE;
212
213 if (identical(x.impl(), y.impl())) {
214 rscal(alpha, y.impl());
215 } else {
216 //
217 // Zero should have the same type as elements of y so that CBLAS
218 // gets called if possible.
219 //
220 typename Vector<VX>::Impl::ElementType Zero(0);
221 scal(Zero, y.impl());
222
223 raxpy(alpha, x.impl(), y.impl());
224 }
225 }
226
227 //
228 // case 2: x is a closure
229 //
230 template <typename ALPHA, typename Op, typename L, typename R, typename VY>
231 void
232 rscalSwitch(const ALPHA &alpha, const VectorClosure<Op, L, R> &x, Vector<VY> &y)
233 {
234 copy(x, y.impl());
235 rscal(alpha, y.impl());
236 }
237
238 //
239 // entry point for switch
240 //
241 template <typename VX, typename T, typename VY>
242 void
243 copy(const VectorClosure<OpDiv, VX, ScalarValue<T> > &x, Vector<VY> &y)
244 {
245 FLENS_BLASLOG_BEGIN_COPY(x, y);
246 rscalSwitch(x.right().value(), x.left(), y.impl());
247 FLENS_BLASLOG_END;
248 }
249
250 //------------------------------------------------------------------------------
251 //
252 // y = conjugate(x)
253 //
254 template <typename VX, typename VY>
255 void
256 copy(const VectorClosureOpConj<VX> &, Vector<VY> &)
257 {
258 ERROR_MSG("Lehn: Will be implemented on demand");
259 ASSERT(0);
260 }
261
262 //------------------------------------------------------------------------------
263 //
264 // y = beta*y + alpha*op(A)*x
265 //
266 template <typename VL, typename MVR, typename VY>
267 typename RestrictTo<ClosureType<OpAdd, VL, MVR>::isMatrixVectorProduct,
268 void>::Type
269 copy(const VectorClosure<OpAdd, VL, MVR> &ypAx, Vector<VY> &y)
270 {
271 FLENS_BLASLOG_BEGIN_COPY(ypAx, y);
272
273 using namespace DEBUGCLOSURE;
274 //
275 // check if y form rhs and lhs are identical
276 //
277 typedef typename PruneScaling<VL>::Remainder RVL;
278
279 const RVL &_y = PruneScaling<VL>::getRemainder(ypAx.left());
280
281 if (!identical(_y, y.impl())) {
282 typedef typename VY::Impl::ElementType TY;
283 const TY One(1);
284 copySum(ypAx.left(), One, ypAx.right(), y.impl());
285 FLENS_BLASLOG_END;
286 return;
287 }
288 //
289 // get factor beta
290 //
291 typedef typename PruneScaling<VL>::ScalingType SVL;
292 const SVL &beta = PruneScaling<VL>::getFactor(ypAx.left());
293 //
294 // Rest gets done by the mv switch
295 //
296 typedef typename MVR::Left::ElementType TA;
297 const auto &A = ypAx.right().left();
298 const auto &x = ypAx.right().right();
299
300 mvSwitch(NoTrans, TA(1), A, x, beta, y.impl());
301
302 FLENS_BLASLOG_END;
303 }
304
305 //------------------------------------------------------------------------------
306 //
307 // y = A*x
308 //
309 template <typename ML, typename VR, typename VY>
310 typename RestrictTo<ClosureType<OpMult, ML, VR>::isMatrixVectorProduct,
311 void>::Type
312 copy(const VectorClosure<OpMult, ML, VR> &Ax, Vector<VY> &y)
313 {
314 FLENS_BLASLOG_BEGIN_COPY(Ax, y);
315
316 //
317 // Call mv switch
318 //
319 typedef typename ML::Impl::ElementType TA;
320 typedef typename VY::Impl::ElementType TY;
321 mvSwitch(NoTrans, TA(1), Ax.left(), Ax.right(), TY(0), y.impl());
322
323 FLENS_BLASLOG_END;
324 }
325
326 //------------------------------------------------------------------------------
327 //
328 // y = x*A
329 //
330 template <typename VL, typename MR, typename VY>
331 typename RestrictTo<ClosureType<OpMult, VL, MR>::isVectorMatrixProduct,
332 void>::Type
333 copy(const VectorClosure<OpMult, VL, MR> &xA, Vector<VY> &y)
334 {
335 FLENS_BLASLOG_BEGIN_COPY(xA, y);
336
337 //
338 // Call mv switch
339 //
340 typedef typename MR::Impl::ElementType TA;
341 typedef typename VY::Impl::ElementType TY;
342 mvSwitch(Trans, TA(1), xA.right(), xA.left(), TY(0), y.impl());
343
344 FLENS_BLASLOG_END;
345 }
346
347 //------------------------------------------------------------------------------
348 //
349 // y = b - A*x
350 //
351 template <typename L, typename R, typename VY>
352 typename RestrictTo<ClosureType<OpSub, L, R>::isResidual,
353 void>::Type
354 copy(const VectorClosure<OpSub, L, R> &bmAx, Vector<VY> &y)
355 {
356 FLENS_BLASLOG_BEGIN_COPY(bmAx, y);
357
358 const auto &b = bmAx.left();
359 const auto &A = bmAx.right().left();
360 const auto &x = bmAx.right().right();
361
362 residual(b, A, x, y.impl());
363
364 FLENS_BLASLOG_END;
365 }
366
367 //------------------------------------------------------------------------------
368 //
369 // y = <Unknown Closure>
370 //
371 #ifndef FLENS_DEBUG_CLOSURES
372
373 template <typename Op, typename VL, typename VR, typename VY>
374 void
375 copy(const VectorClosure<Op, VL, VR> &, Vector<VY> &)
376 {
377 ASSERT(0);
378 }
379
380 #else
381
382 template <typename Op, typename VL, typename VR, typename VY>
383 void
384 copy(const VectorClosure<Op, VL, VR> &x, Vector<VY> &y)
385 {
386 FLENS_BLASLOG_ERROR_COPY(x, y);
387 ASSERT(0);
388 }
389
390 #endif
391
392 //== matrix closures ===========================================================
393 //
394 // In the following comments op(X) denotes X, X^T or X^H
395 //
396
397 //
398 // Auxiliary function for
399 // B = op(A1 + A2) (alpha= 1)
400 // or B = op(A1 - A2) (alpha=-1)
401 //
402 template <typename MA1, typename ALPHA, typename MA2, typename MB>
403 void
404 copySum(Transpose trans,
405 const MA1 &A1, const ALPHA &alpha, const MA2 &A2, MB &B)
406 {
407 ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
408 //
409 // In debug-closure-mode we check if A2 has to stored in a temporary.
410 // Otherwise an assertion gets triggered if A2 and B are identical of if A2
411 // is a closure that contains B.
412 //
413 # ifdef FLENS_DEBUG_CLOSURES
414 bool tmpNeeded = DebugClosure::search(A2, B);
415 typename Result<MA2>::NoView tmp;
416
417 if (tmpNeeded) {
418 tmp = A2;
419 FLENS_BLASLOG_TMP_ADD(tmp);
420 }
421 # else
422 ASSERT(!DebugClosure::search(A2, B));
423 # endif
424
425 //
426 // B = A1
427 //
428 blas::copy(trans, A1, B);
429 //
430 // B += A2 or B -= A2
431 //
432 # ifdef FLENS_DEBUG_CLOSURES
433 if (tmpNeeded) {
434 blas::axpy(trans, alpha, tmp, B);
435 FLENS_BLASLOG_TMP_REMOVE(tmp, A2);
436 } else {
437 blas::axpy(trans, alpha, A2, B);
438 }
439 # else
440 blas::axpy(trans, alpha, A2, B);
441 # endif
442
443 }
444
445 //------------------------------------------------------------------------------
446 //
447 // B = op(A1 + A2)
448 //
449 template <typename ML, typename MR, typename MB>
450 typename RestrictTo<!ClosureType<OpAdd, ML, MR>::isMatrixMatrixProduct,
451 void>::Type
452 copy(Transpose trans, const MatrixClosure<OpAdd, ML, MR> &A, Matrix<MB> &B)
453 {
454 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
455
456 typename MB::Impl::ElementType One(1);
457 copySum(trans, A.left(), One, A.right(), B.impl());
458
459 FLENS_BLASLOG_END;
460 }
461
462 //------------------------------------------------------------------------------
463 //
464 // B = op(A1 - A2)
465 //
466 template <typename ML, typename MR, typename MB>
467 void
468 copy(Transpose trans, const MatrixClosure<OpSub, ML, MR> &A, Matrix<MB> &B)
469 {
470 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
471
472 typename MB::Impl::ElementType MinusOne(-1);
473 copySum(trans, A.left(), MinusOne, A.right(), B.impl());
474
475 FLENS_BLASLOG_END;
476 }
477
478 //------------------------------------------------------------------------------
479 //
480 // B = alpha*op(A)
481 //
482 // We evalute this with a scalSwitch
483 // case 1: A is no closure
484 // case 2: A is a closure
485
486 //
487 // case 1: A is no closure
488 //
489 template <typename ALPHA, typename MA, typename MB>
490 void
491 scalSwitch(Transpose trans,
492 const ALPHA &alpha, const Matrix<MA> &A, Matrix<MB> &B)
493 {
494 using namespace DEBUGCLOSURE;
495
496 if (identical(A.impl(), B.impl())) {
497 scal(alpha, B.impl());
498 if (trans!=NoTrans) {
499 cotr(trans, B.impl());
500 }
501 } else {
502 //
503 // Zero should have the same type as elements of y so that CBLAS
504 // gets called if possible.
505 //
506 typename Matrix<MA>::Impl::ElementType Zero(0);
507 scal(Zero, B.impl());
508
509 axpy(trans, alpha, A.impl(), B.impl());
510 }
511 }
512
513 //
514 // case 2: A is a closure
515 //
516 template <typename ALPHA, typename Op, typename L, typename R, typename MB>
517 void
518 scalSwitch(Transpose trans,
519 const ALPHA &alpha, const MatrixClosure<Op, L, R> &A, Matrix<MB> &B)
520 {
521 copy(trans, A, B.impl());
522 scal(alpha, B.impl());
523 }
524
525 template <typename T, typename MA, typename MB>
526 void
527 copy(Transpose trans,
528 const MatrixClosure<OpMult, ScalarValue<T>, MA> &A, Matrix<MB> &B)
529 {
530 FLENS_BLASLOG_BEGIN_COPY(A, B);
531 scalSwitch(trans, A.left().value(), A.right(), B.impl());
532 FLENS_BLASLOG_END;
533 }
534
535 //------------------------------------------------------------------------------
536 //
537 // B = op(A)/alpha
538 //
539
540 // We evalute this with a scalSwitch
541 // case 1: A is no closure
542 // case 2: A is a closure
543
544 //
545 // case 1: A is no closure
546 //
547 template <typename ALPHA, typename MA, typename MB>
548 void
549 rscalSwitch(Transpose trans,
550 const ALPHA &alpha, const Matrix<MA> &A, Matrix<MB> &B)
551 {
552 using namespace DEBUGCLOSURE;
553
554 if (identical(A.impl(), B.impl())) {
555 rscal(alpha, B.impl());
556 if (trans!=NoTrans) {
557 cotr(trans, B.impl());
558 }
559 } else {
560 //
561 // Zero should have the same type as elements of y so that CBLAS
562 // gets called if possible.
563 //
564 typename Matrix<MA>::Impl::ElementType Zero(0);
565 scal(Zero, B.impl());
566
567 raxpy(trans, alpha, A.impl(), B.impl());
568 }
569 }
570
571 //
572 // case 2: A is a closure
573 //
574 template <typename ALPHA, typename Op, typename L, typename R, typename MB>
575 void
576 rscalSwitch(Transpose trans,
577 const ALPHA &alpha, const MatrixClosure<Op, L, R> &A, Matrix<MB> &B)
578 {
579 copy(trans, A, B.impl());
580 rscal(alpha, B.impl());
581 }
582
583 template <typename MA, typename T, typename MB>
584 void
585 copy(Transpose trans,
586 const MatrixClosure<OpDiv, MA, ScalarValue<T> > &A, Matrix<MB> &B)
587 {
588 FLENS_BLASLOG_BEGIN_COPY(A, B);
589 rscalSwitch(trans, A.right().value(), A.left(), B.impl());
590 FLENS_BLASLOG_END;
591 }
592
593 //------------------------------------------------------------------------------
594 //
595 // B = op(A^T)
596 //
597 template <typename MA, typename MB>
598 void
599 copy(Transpose trans, const MatrixClosureOpTrans<MA> &A, Matrix<MB> &B)
600 {
601 using namespace DEBUGCLOSURE;
602
603 FLENS_BLASLOG_BEGIN_MCOPY(trans, A, B);
604
605 Transpose _trans = Transpose(trans^Trans);
606
607 copy(_trans, A.left(), B.impl());
608
609 FLENS_BLASLOG_END;
610 }
611
612 //------------------------------------------------------------------------------
613 //
614 // C = beta*C + A*B
615 //
616 // We evalute this with a mvSwitch
617 // case 1: A is no closure
618 // case 2: A is a scaling closure (i.e. scale*A)
619 // case 3: A is some other closure
620 // in all cases B is no closure.
621
622
623
624
625 //------------------------------------------------------------------------------
626 //
627 // C = beta*C + A*B
628 //
629 template <typename ML, typename MR, typename MC>
630 typename RestrictTo<ClosureType<OpAdd, ML, MR>::isMatrixMatrixProduct,
631 void>::Type
632 copy(Transpose trans, const MatrixClosure<OpAdd, ML, MR> &CPAB, Matrix<MC> &C)
633 {
634 FLENS_BLASLOG_BEGIN_COPY(CPAB, C);
635
636 using namespace DEBUGCLOSURE;
637 //
638 // check if C form rhs and lhs are identical
639 //
640 typedef typename PruneScaling<ML>::Remainder RML;
641
642 const RML &_C = PruneScaling<ML>::getRemainder(CPAB.left());
643
644 if (trans!=NoTrans || !identical(_C, C.impl())) {
645 typedef typename MC::Impl::ElementType TC;
646 const TC One(1);
647 copySum(trans, CPAB.left(), One, CPAB.right(), C.impl());
648 FLENS_BLASLOG_END;
649 return;
650 }
651 //
652 // get factor beta
653 //
654 typedef typename PruneScaling<ML>::ScalingType SML;
655 const SML &beta = PruneScaling<ML>::getFactor(CPAB.left());
656 //
657 // Rest gets done by the mv switch
658 //
659 typedef typename MR::Left::ElementType TA;
660 const auto &A = CPAB.right().left();
661 const auto &B = CPAB.right().right();
662
663 if (trans==NoTrans || trans==Conj) {
664 mmSwitch(trans, trans, TA(1), A, B, beta, C.impl());
665 } else {
666 mmSwitch(trans, trans, TA(1), B, A, beta, C.impl());
667 }
668
669 FLENS_BLASLOG_END;
670 }
671
672 //------------------------------------------------------------------------------
673 //
674 // C = A*B
675 //
676 template <typename MA, typename MB, typename MC>
677 typename RestrictTo<ClosureType<OpMult, MA, MB>::isMatrixMatrixProduct,
678 void>::Type
679 copy(Transpose trans, const MatrixClosure<OpMult, MA, MB> &AB, Matrix<MC> &C)
680 {
681 FLENS_BLASLOG_BEGIN_MCOPY(trans, AB, C);
682
683 //
684 // Call mv switch
685 //
686 typedef typename MA::Impl::ElementType TA;
687 typedef typename MC::Impl::ElementType TC;
688
689 const auto &A = AB.left();
690 const auto &B = AB.right();
691
692 if (trans==NoTrans || trans==Conj) {
693 mmSwitch(trans, trans, TA(1), A, B, TC(0), C.impl());
694 } else {
695 mmSwitch(trans, trans, TA(1), B, A, TC(0), C.impl());
696 }
697
698 FLENS_BLASLOG_END;
699 }
700
701
702 //------------------------------------------------------------------------------
703 //
704 // B = <Unknown Closure>
705 //
706 #ifndef FLENS_DEBUG_CLOSURES
707
708 template <typename Op, typename ML, typename MR, typename MB>
709 void
710 copy(Transpose, const MatrixClosure<Op, ML, MR> &, Matrix<MB> &)
711 {
712 ERROR_MSG("B = <Unknown Closure>");
713 ASSERT(0);
714 }
715
716 #else
717
718 template <typename Op, typename ML, typename MR, typename MB>
719 void
720 copy(Transpose trans, const MatrixClosure<Op, ML, MR> &A, Matrix<MB> &B)
721 {
722 FLENS_BLASLOG_ERROR_MCOPY(trans, A, B);
723 ERROR_MSG("B = <Unknown Closure>");
724 ASSERT(0);
725 }
726
727 #endif
728
729 //-- symmetric matrices --------------------------------------------------------
730 //
731 // We just trans is NoTrans or Trans we simply ignore it
732 //
733 template <typename MA, typename MB>
734 void
735 copy(Transpose trans, const SymmetricMatrix<MA> &A, Matrix<MB> &B)
736 {
737 # ifndef FLENS_DEBUG_CLOSURES
738 ASSERT(trans==NoTrans || trans==Trans);
739 # else
740 if (trans!=NoTrans && trans!=Trans) {
741 typedef typename MA::ElementType TA;
742
743 GeMatrix<FullStorage<TA> > _A = A;
744 FLENS_BLASLOG_TMP_ADD(_A);
745
746 copy(trans, _A, B.impl());
747
748 FLENS_BLASLOG_TMP_REMOVE(_A, A);
749 return;
750 }
751 # endif
752 copy(A.impl(), B.impl());
753 }
754
755 template <typename MA, typename MB>
756 typename RestrictTo<!IsSymmetricMatrix<MA>::value,
757 void>::Type
758 copy(Transpose trans, const Matrix<MA> &A, SymmetricMatrix<MB> &B)
759 {
760 copy(trans, A.impl(), B.impl());
761 }
762
763 } } // namespace blas, flens
764
765 #endif // FLENS_BLAS_CLOSURES_COPY_TCC