1 
  2 
  3 
  4 
  5 
  6 
  7 
  8 
  9 
 10 
 11 
 12 
 13 
 14 
 15 
 16 
 17 
 18 
 19 
 20 
 21 
 22 
 23 
 24 
 25 
 26 
 27 
 28 
 29 
 30 
 31 
 32 
 33 
 34 
 35 
 36 
 37 
 38 
 39 
 40 
 41 
 42 
 43 
 44 
 45 
 46 
 47 
 48 
 49 
 50 
 51 
 52 
 53 
 54 
 55 
 56 
 57 
 58 
 59 
 60 
 61 
 62 
 63 
 64 
 65 
 66 
 67 
 68 
 69 
 70 
 71 
 72 
 73 
 74 
 75 
 76 
 77 
 78 
 79 
 80 
 81 
 82 
 83 
 84 
 85 
 86 
 87 
 88 
 89 
 90 
 91 
 92 
 93 
 94 
 95 
 96 
 97 
 98 
 99 
100 
101 
102 
103 
104 
105 
106 
107 
108 
109 
110 
111 
112 
113 
114 
115 
116 
117 
118 
119 
120 
121 
122 
123 
124 
125 
126 
127 
128 
129 
130 
131 
132 
133 
134 
135 
136 
137 
138 
139 
140 
141 
142 
143 
144 
145 
146 
147 
148 
149 
150 
151 
152 
153 
154 
155 
156 
157 
158 
159 
160 
161 
162 
163 
164 
165 
166 
167 
168 
169 
170 
171 
172 
173 
174 
175 
176 
177 
178 
179 
180 
181 
182 
183 
184 
185 
186 
187 
188 
189 
190 
191 
192 
193 
194 
195 
196 
197 
198 
199 
200 
201 
202 
203 
204 
205 
206 
207 
208 
209 
210 
211 
212 
213 
214 
215 
216 
217 
218 
219 
220 
221 
222 
223 
224 
225 
226 
227 
228 
229 
230 
231 
232 
233 
234 
235 
236 
237 
238 
239 
240 
241 
242 
243 
244 
245 
246 
247 
248 
249 
250 
251 
252 
253 
254 
255 
256 
257 
258 
259 
260 
261 
262 
263 
264 
265 
266 
267 
268 
269 
270 
271 
272 
273 
274 
275 
276 
277 
278 
279 
280 
281 
282 
283 
284 
285 
286 
287 
288 
289 
290 
291 
292 
293 
294 
295 
296 
297 
298 
299 
300 
301 
302 
303 
304 
305 
306 
307 
308 
309 
310 
311 
312 
313 
314 
315 
316 
317 
318 
319 
320 
321 
322 
323 
324 
325 
326 
327 
328 
329 
330 
331 
332 
333 
334 
335 
336 
337 
338 
339 
340 
341 
342 
343 
344 
345 
346 
347 
348 
349 
350 
351 
352 
353 
354 
355 
356 
357 
358 
359 
360 
361 
362 
363 
364 
365 
366 
367 
368 
369 
370 
371 
372 
373 
374 
375 
376 
377 
378 
379 
380 
381 
382 
383 
384 
385 
386 
387 
388 
389 
390 
391 
392 
393 
394 
395 
396 
397 
398 
399 
400 
 
 | 
 
    /* 
 *   Copyright (c) 2012, Michael Lehn 
 * 
 *   All rights reserved. 
 * 
 *   Redistribution and use in source and binary forms, with or without 
 *   modification, are permitted provided that the following conditions 
 *   are met: 
 * 
 *   1) Redistributions of source code must retain the above copyright 
 *      notice, this list of conditions and the following disclaimer. 
 *   2) Redistributions in binary form must reproduce the above copyright 
 *      notice, this list of conditions and the following disclaimer in 
 *      the documentation and/or other materials provided with the 
 *      distribution. 
 *   3) Neither the name of the FLENS development group nor the names of 
 *      its contributors may be used to endorse or promote products derived 
 *      from this software without specific prior written permission. 
 * 
 *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 */ 
 
#ifndef FLENS_BLAS_CLOSURES_LEVEL3_MM_TCC 
#define FLENS_BLAS_CLOSURES_LEVEL3_MM_TCC 1 
 
#include <flens/auxiliary/auxiliary.h> 
#include <flens/blas/closures/closures.h> 
#include <flens/blas/level1/level1.h> 
#include <flens/blas/level2/level2.h> 
#include <flens/blas/level3/level3.h> 
#include <flens/typedefs.h> 
 
#ifdef FLENS_DEBUG_CLOSURES 
#   include <flens/blas/blaslogon.h> 
#else 
#   include <flens/blas/blaslogoff.h> 
#endif 
 
namespace flens { namespace blas { 
 
//== GeneralMatrix - GeneralMatrix products ==================================== 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose, Transpose, const ALPHA &, 
   const GeneralMatrix<MA> &, const GeneralMatrix<MB> &, 
   const BETA &, Matrix<MC> &) 
{ 
    // You get here if you want to call a matrix-matrix product that was not 
    // defined.  Or its not correctly included. 
    // TODO: print the signature of the blas::mm() function that needs to be 
    //       implemented. 
    ASSERT(0); 
} 
 
//== TriangularMatrix - GeneralMatrix products ================================= 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
typename RestrictTo<IsTriangularMatrix<MA>::value && 
                    IsGeneralMatrix<MB>::value && 
                    IsGeneralMatrix<MC>::value, 
         void>::Type 
trmm(Side side, Transpose transA, Transpose DEBUG_VAR(transB), 
     const ALPHA &alpha, const MA &A_, const MB &B_, 
     const BETA &DEBUG_VAR(beta), MC &C) 
{ 
    using namespace DEBUGCLOSURE; 
 
    typedef typename Result<MA>::Type  RMA; 
    typedef typename Result<MB>::Type  RMB; 
 
// 
//  In non-closure debug mode we do not allow temporaries for A or B. 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    static_assert(IsSame<RMA, typename Result<RMA>::Type>::value, 
                  "temporary required"); 
    static_assert(IsSame<RMB, typename Result<RMB>::Type>::value, 
                  "temporary required"); 
#   else 
    typedef typename Result<MC>::Type  RMC; 
#   endif 
 
// 
//  If A_ or B_ is a closure temporaries get created 
// 
    FLENS_BLASLOG_TMP_TRON; 
    const RMA &A = A_; 
    const RMB &B = B_; 
    FLENS_BLASLOG_TMP_TROFF; 
 
// 
//  If beta!=0 or transB!=NoTrans or A and C share the same memopry we need 
//  temporaries 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    ASSERT(beta==BETA(0)); 
    ASSERT(transB==NoTrans); 
    ASSERT(!identical(A, C)); 
#   else 
 
    typedef typename Result<MC>::Type  RMC; 
 
    if (transB!=NoTrans) { 
// 
//      apply op(B) and recall trmm 
// 
        typename RMB::NoView B_; 
        FLENS_BLASLOG_TMP_ADD(B_); 
        copy(transB, B, B_); 
        trmm(side, transA, NoTrans, alpha, A, B_, beta, C); 
        FLENS_BLASLOG_TMP_REMOVE(B_, B); 
        if (!IsSame<RMA, typename Result<RMA>::Type>::value) { 
            FLENS_BLASLOG_TMP_REMOVE(A, A_); 
        } 
        if (!IsSame<RMB, typename Result<RMB>::Type>::value) { 
            FLENS_BLASLOG_TMP_REMOVE(B, B_); 
        } 
        return; 
    } 
    if (identical(A,C)) { 
        FLENS_BLASLOG_IDENTICAL(A, C); 
        typename RMC::NoView C_; 
        FLENS_BLASLOG_TMP_ADD(C_); 
 
        trmm(side, transA, NoTrans, alpha, A, B, beta, C_); 
        C = C_; 
 
        FLENS_BLASLOG_TMP_REMOVE(C_, C); 
        return; 
    } 
    typename RMC::NoView tmpC; 
    if (beta!=BETA(0)) { 
        FLENS_BLASLOG_TMP_ADD(tmpC); 
        tmpC = C; 
    } 
#   endif 
 
// 
//  trmm can only compute C = A*C or C = C*A.  So if B and C are not identical 
//  we need to copy 
// 
    if (!identical(C, B)) { 
        C = B; 
    } 
    mm(side, transA, alpha, A, C); 
 
#   ifdef FLENS_DEBUG_CLOSURES 
    if (beta!=BETA(0)) { 
        C += beta*tmpC; 
        FLENS_BLASLOG_TMP_REMOVE(tmpC, C); 
    } 
    if (!IsSame<RMA, typename Result<RMA>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(A, A_); 
    } 
    if (!IsSame<RMB, typename Result<RMB>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(B, B_); 
    } 
#   endif 
} 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose transA, Transpose transB, const ALPHA &alpha, 
   const TriangularMatrix<MA> &A, const GeneralMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    trmm(Left, transA, transB, alpha, A.impl(), B.impl(), beta, C.impl()); 
} 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose transA, Transpose transB, const ALPHA &alpha, 
   const GeneralMatrix<MA> &A, const TriangularMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    trmm(Right, transB, transA, alpha, B.impl(), A.impl(), beta, C.impl()); 
} 
 
//== SymmetricMatrix - GeneralMatrix products ================================== 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
symm(Side side, Transpose DEBUG_VAR(transB), const ALPHA &alpha, 
     const SymmetricMatrix<MA> &A_, const GeneralMatrix<MB> &B_, 
     const BETA &beta, Matrix<MC> &C) 
{ 
    using namespace DEBUGCLOSURE; 
 
    typedef typename Result<typename MA::Impl>::Type  RMA; 
    typedef typename Result<typename MB::Impl>::Type  RMB; 
 
// 
//  In non-closure debug mode we do not allow temporaries for A or B. 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    static_assert(IsSame<RMA, typename Result<RMA>::Type>::value, 
                  "temporary required"); 
    static_assert(IsSame<RMB, typename Result<RMB>::Type>::value, 
                  "temporary required"); 
    ASSERT(transB==NoTrans); 
#   endif 
 
// 
//  If A_ or B_ is a closure temporaries get created 
// 
    FLENS_BLASLOG_TMP_TRON; 
    const RMA &A = A_.impl(); 
    const RMB &B = B_.impl(); 
    FLENS_BLASLOG_TMP_TROFF; 
 
// 
//  call (sy)mm 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    mm(side, alpha, A.impl(), B.impl(), beta, C.impl()); 
#   else 
// 
//  if transB is not NoTrans we need another temporary 
// 
    if (transB==NoTrans) { 
        mm(side, alpha, A, B, beta, C.impl()); 
    } else { 
        typename RMB::NoView B_; 
        FLENS_BLASLOG_TMP_ADD(B_); 
        copy(transB, B, B_); 
        mm(side, alpha, A, B_, beta, C.impl()); 
        FLENS_BLASLOG_TMP_REMOVE(B_, B); 
    } 
#   endif 
 
#   ifdef FLENS_DEBUG_CLOSURES 
    if (!IsSame<RMA, typename Result<RMA>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(A, A_); 
    } 
    if (!IsSame<RMB, typename Result<RMB>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(B, B_); 
    } 
#   endif 
} 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose DEBUG_VAR(transA), Transpose transB, const ALPHA &alpha, 
   const SymmetricMatrix<MA> &A, const GeneralMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    ASSERT(transA==NoTrans || transA==Trans); 
    symm(Left, transB, alpha, A.impl(), B.impl(), beta, C.impl()); 
} 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose DEBUG_VAR(transA), Transpose transB, const ALPHA &alpha, 
   const GeneralMatrix<MA> &A, const SymmetricMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    ASSERT(transA==NoTrans || transA==Trans); 
    symm(Right, transB, alpha, B.impl(), A.impl(), beta, C.impl()); 
} 
 
//== HermitianMatrix - GeneralMatrix products ================================== 
 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
hemm(Side side, Transpose DEBUG_VAR(transB), const ALPHA &alpha, 
     const HermitianMatrix<MA> &A_, const GeneralMatrix<MB> &B_, 
     const BETA &beta, Matrix<MC> &C) 
{ 
    using namespace DEBUGCLOSURE; 
 
    typedef typename Result<typename MA::Impl>::Type  RMA; 
    typedef typename Result<typename MB::Impl>::Type  RMB; 
 
// 
//  In non-closure debug mode we do not allow temporaries for A or B. 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    static_assert(IsSame<RMA, typename Result<RMA>::Type>::value, 
                  "temporary required"); 
    static_assert(IsSame<RMB, typename Result<RMB>::Type>::value, 
                  "temporary required"); 
    ASSERT(transB==NoTrans); 
#   endif 
 
// 
//  If A_ or B_ is a closure temporaries get created 
// 
    FLENS_BLASLOG_TMP_TRON; 
    const RMA &A = A_.impl(); 
    const RMB &B = B_.impl(); 
    FLENS_BLASLOG_TMP_TROFF; 
 
// 
//  call (he)mm 
// 
#   ifndef FLENS_DEBUG_CLOSURES 
    mm(side, alpha, A.impl(), B.impl(), beta, C.impl()); 
#   else 
// 
//  if transB is not NoTrans we need another temporary 
// 
    if (transB==NoTrans) { 
        mm(side, alpha, A, B, beta, C.impl()); 
    } else { 
        typename RMB::NoView B_; 
        FLENS_BLASLOG_TMP_ADD(B_); 
        copy(transB, B, B_); 
        mm(side, alpha, A, B_, beta, C.impl()); 
        FLENS_BLASLOG_TMP_REMOVE(B_, B); 
    } 
#   endif 
 
#   ifdef FLENS_DEBUG_CLOSURES 
    if (!IsSame<RMA, typename Result<RMA>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(A, A_); 
    } 
    if (!IsSame<RMB, typename Result<RMB>::Type>::value) { 
        FLENS_BLASLOG_TMP_REMOVE(B, B_); 
    } 
#   endif 
} 
 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose DEBUG_VAR(transA), Transpose transB, const ALPHA &alpha, 
   const HermitianMatrix<MA> &A, const GeneralMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    ASSERT(transA==NoTrans || transA==Trans); 
    hemm(Left, transB, alpha, A.impl(), B.impl(), beta, C.impl()); 
} 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose DEBUG_VAR(transA), Transpose transB, const ALPHA &alpha, 
   const GeneralMatrix<MA> &A, const HermitianMatrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    ASSERT(transA==NoTrans || transA==Trans); 
    hemm(Right, transB, alpha, B.impl(), A.impl(), beta, C.impl()); 
} 
 
 
 
 
 
 
//== Matrix - Matrix products ================================================== 
// 
//  This gets called if everything else fails 
// 
#ifdef FLENS_DEBUG_CLOSURES 
 
template <typename ALPHA, typename MA, typename MB, typename BETA, typename MC> 
void 
mm(Transpose transA, Transpose transB, const ALPHA &alpha, 
   const Matrix<MA> &A, const Matrix<MB> &B, 
   const BETA &beta, Matrix<MC> &C) 
{ 
    FLENS_BLASLOG_BEGIN_GEMM(transA, transB, alpha, A.impl(), 
                             B.impl(), beta, C.impl()); 
// 
//  We create temporaries of type GeMatrix for all matrices on the right 
//  hand side.  If A and B can be converted to GeMatrix types this at 
//  least does the desired compuation. 
// 
    typedef typename MA::Impl::ElementType        TA; 
    typedef typename MB::Impl::ElementType        TB; 
 
    typedef GeMatrix<FullStorage<TA, ColMajor> >  RMA; 
    typedef GeMatrix<FullStorage<TB, ColMajor> >  RMB; 
 
    FLENS_BLASLOG_TMP_TRON; 
    const RMA &A_ = A.impl(); 
    const RMB &B_ = B.impl(); 
    FLENS_BLASLOG_TMP_TROFF; 
 
    mm(transA, transB, alpha, A_, B_, beta, C.impl()); 
 
    FLENS_BLASLOG_TMP_REMOVE(A_, A); 
    FLENS_BLASLOG_TMP_REMOVE(B_, B); 
 
    FLENS_BLASLOG_END; 
} 
 
#endif 
 
} } // namespace blas, flens 
 
#endif // FLENS_BLAS_CLOSURES_LEVEL3_MM_TCC 
 
 |