1 /*
  2  *   Copyright (c) 2012, Michael Lehn
  3  *
  4  *   All rights reserved.
  5  *
  6  *   Redistribution and use in source and binary forms, with or without
  7  *   modification, are permitted provided that the following conditions
  8  *   are met:
  9  *
 10  *   1) Redistributions of source code must retain the above copyright
 11  *      notice, this list of conditions and the following disclaimer.
 12  *   2) Redistributions in binary form must reproduce the above copyright
 13  *      notice, this list of conditions and the following disclaimer in
 14  *      the documentation and/or other materials provided with the
 15  *      distribution.
 16  *   3) Neither the name of the FLENS development group nor the names of
 17  *      its contributors may be used to endorse or promote products derived
 18  *      from this software without specific prior written permission.
 19  *
 20  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 21  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 22  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 23  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 24  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 25  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 26  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31  */
 32 
 33 #ifndef FLENS_BLAS_CLOSURES_MMSWITCH_TCC
 34 #define FLENS_BLAS_CLOSURES_MMSWITCH_TCC 1
 35 
 36 #include <flens/aux/aux.h>
 37 #include <flens/blas/closures/debugclosure.h>
 38 #include <flens/blas/closures/prune.h>
 39 #include <flens/blas/level1/level1.h>
 40 #include <flens/blas/level2/level2.h>
 41 #include <flens/typedefs.h>
 42 
 43 #ifdef FLENS_DEBUG_CLOSURES
 44 #   include <flens/blas/blaslogon.h>
 45 #else
 46 #   include <flens/blas/blaslogoff.h>
 47 #endif
 48 
 49 namespace flens { namespace blas {
 50 
 51 //
 52 //  This switch evaluates closures of the form
 53 //
 54 //      C = beta*C + A*B
 55 //
 56 //  If B is a closure then it gets evaluated and a temporary gets created to
 57 //  store the result.  For matrix A we distinguish between three cases:
 58 //  case 1: A is no closure
 59 //  case 2: A is a scaling closure (i.e. scale*A)
 60 //  case 3: A is some other closure
 61 
 62 //
 63 //  Entry point for mmSwitch
 64 //
 65 template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC>
 66 typename RestrictTo<IsSame<MA, typename MA::Impl>::value &&
 67                     IsSame<MB, typename MB::Impl>::value &&
 68                     IsSame<MC, typename MC::Impl>::value,
 69          void>::Type
 70 mmSwitch(Transpose transA, Transpose transB, const ALPHA &alpha,
 71          const MA &A, const MB &B, const BETA &beta, MC &C)
 72 {
 73     ASSERT(alpha==ALPHA(1) || alpha==ALPHA(-1));
 74 //
 75 //  If A is a closure then prune arbitrary many OpTrans/OpConj
 76 //
 77     typedef typename PruneConjTrans<MA>::Remainder RMA;
 78 
 79     transA = Transpose(transA^PruneConjTrans<MA>::trans);
 80     const RMA  &_A = PruneConjTrans<MA>::remainder(A);
 81 //
 82 //  If B is a closure then prune arbitrary many OpTrans/OpConj
 83 //
 84     transB = Transpose(transB^PruneConjTrans<MB>::trans);
 85 //
 86 //  If the remainder B is a closure it gets evaluated.  In this case a temporary
 87 //  gets created.  Otherwise we only keep a reference
 88 //
 89     FLENS_BLASLOG_TMP_TRON;
 90     typedef typename PruneConjTrans<MB>::Remainder RMB;
 91     const typename Result<RMB>::Type &_B = PruneConjTrans<MB>::remainder(B);
 92     FLENS_BLASLOG_TMP_TROFF;
 93 //
 94 //  Call mm implementation
 95 //
 96     mmCase(transA, transB, alpha, _A, _B, beta, C);
 97 //
 98 //  If a temporary was created and registered before we now unregister it
 99 //
100 #   ifdef FLENS_DEBUG_CLOSURES
101     if (!IsSame<RMB, typename Result<RMB>::Type>::value) {
102         FLENS_BLASLOG_TMP_REMOVE(_B, PruneConjTrans<MB>::remainder(B));
103     }
104 #   else
105     static_assert(IsSame<RMB, typename Result<RMB>::Type>::value,
106                   "temporary required");
107 #   endif
108 }
109 
110 //
111 //  case 1: A is no closure
112 //
113 template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC>
114 typename RestrictTo<!IsClosure<MA>::value,
115          void>::Type
116 mmCase(Transpose transA, Transpose transB, const ALPHA &alpha,
117        const MA &A, const MB &B, const BETA &beta, MC &C)
118 {
119     mm(transA, transB, alpha, A, B, beta, C);
120 }
121 
122 //
123 //  case 2: A is closure of type scale*A
124 //
125 template <typename ALPHA, typename T, typename MA, typename MB, typename BETA,
126           typename MC>
127 void
128 mmCase(Transpose transA, Transpose transB, const ALPHA &alpha,
129        const MatrixClosure<OpMult, ScalarValue<T>, MA> &scale_A,
130        const MB &B, const BETA &beta, MC &C)
131 {
132 //
133 //  If A is a closure then prune arbitrary many OpTrans/OpConj
134 //
135     typedef typename PruneConjTrans<MA>::Remainder  RMA;
136     typedef typename Result<RMA>::Type              ResultRMA;
137 
138     transA = Transpose(transA^PruneConjTrans<MA>::trans);
139 //
140 //  If the remaining A is a closure it gets evaluated.  In this case
141 //  a temporary gets created.  Otherwise we only keep a reference
142 //
143     FLENS_BLASLOG_TMP_TRON;
144     const RMA &_A      = PruneConjTrans<MA>::remainder(scale_A.right());
145     const ResultRMA &A = _A;
146     FLENS_BLASLOG_TMP_TROFF;
147 
148     mm(transA, transB, alpha*scale_A.left().value(), A, B, beta, C);
149 
150 //
151 //  If a temporary was created and registered before we now unregister it
152 //
153 #   ifdef FLENS_DEBUG_CLOSURES
154     if (!IsSame<RMA, ResultRMA>::value) {
155         FLENS_BLASLOG_TMP_REMOVE(A, _A);
156     }
157 #   else
158     static_assert(IsSame<RMA, typename Result<RMA>::Type>::value,
159                   "temporary required");
160 #   endif
161 }
162 
163 //
164 //  case 3: A is some other closure
165 //
166 template <typename ALPHA, typename Op, typename L, typename R, typename MB,
167           typename BETA, typename MC>
168 void
169 mmCase(Transpose transA, Transpose transB, const ALPHA &alpha,
170        const MatrixClosure<Op, L, R> &A, const MB &B, const BETA &beta, MC &C)
171 {
172     typedef MatrixClosure<Op, L, R>  ClosureType;
173 
174 //
175 //  Create (most certainly) temporary for the result of A
176 //
177     FLENS_BLASLOG_TMP_TRON;
178     typedef typename Result<ClosureType>::Type  MA;
179     const MA &_A = A;
180     FLENS_BLASLOG_TMP_TROFF;
181 
182     mm(transA, transB, alpha, _A, B, beta, C);
183 
184 //
185 //  If a temporary was created and registered before we now unregister it
186 //
187 #   ifdef FLENS_DEBUG_CLOSURES
188     if (!IsSame<ClosureType, MA>::value) {
189         FLENS_BLASLOG_TMP_REMOVE(_A, A);
190     }
191 #   else
192     static_assert(IsSame<ClosureType, MA>::value, "temporary required");
193 #   endif
194 }
195 
196 } } // namespace blas, flens
197 
198 #endif // FLENS_BLAS_CLOSURES_MMSWITCH_TCC