1 
  2 
  3 
  4 
  5 
  6 
  7 
  8 
  9 
 10 
 11 
 12 
 13 
 14 
 15 
 16 
 17 
 18 
 19 
 20 
 21 
 22 
 23 
 24 
 25 
 26 
 27 
 28 
 29 
 30 
 31 
 32 
 33 
 34 
 35 
 36 
 37 
 38 
 39 
 40 
 41 
 42 
 43 
 44 
 45 
 46 
 47 
 48 
 49 
 50 
 51 
 52 
 53 
 54 
 55 
 56 
 57 
 58 
 59 
 60 
 61 
 62 
 63 
 64 
 65 
 66 
 67 
 68 
 69 
 70 
 71 
 72 
 73 
 74 
 75 
 76 
 77 
 78 
 79 
 80 
 81 
 82 
 83 
 84 
 85 
 86 
 87 
 88 
 89 
 90 
 91 
 92 
 93 
 94 
 95 
 96 
 97 
 98 
 99 
100 
101 
102 
103 
104 
105 
106 
107 
108 
109 
110 
111 
112 
113 
114 
 
 | 
 
/* 
 * Copyright (C) 2014, The University of Texas at Austin 
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are 
 * met: 
 *  - Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer. 
 *  - Redistributions in binary form must reproduce the above copyright 
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the distribution. 
 *  - Neither the name of The University of Texas at Austin nor the names 
 *    of its contributors may be used to endorse or promote products 
 *    derived from this software without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 * 
 */ 
 
/* 
 * Copyright (C) 2014-2015, Michael Lehn 
 * 
 * ulmBLAS adopted general ideas from BLIS.  Using micro kernels from BLIS 
 * only requires minor modifications, 
 * 
 */ 
 
#ifndef ULMBLAS_IMPL_LEVEL3_MKERNEL_MTRUMM_TCC 
#define ULMBLAS_IMPL_LEVEL3_MKERNEL_MTRUMM_TCC 1 
 
#include <ulmblas/impl/level3/ukernel/ugemm.h> 
#include <ulmblas/impl/level3/mkernel/mtrumm.h> 
 
namespace ulmBLAS { 
 
template <typename IndexType, typename Alpha, typename T, typename TB> 
void 
mtrumm(IndexType    mc, 
       IndexType    nc, 
       const Alpha  &alpha, 
       const T      *A_, 
       const T      *B_, 
       TB           *B, 
       IndexType    incRowB, 
       IndexType    incColB) 
{ 
    const IndexType MR = BlockSizeUGemm<T>::MR; 
    const IndexType NR = BlockSizeUGemm<T>::NR; 
 
    const IndexType mp = (mc+MR-1) / MR; 
    const IndexType np = (nc+NR-1) / NR; 
 
    const IndexType mr_ = mc % MR; 
    const IndexType nr_ = nc % NR; 
 
    IndexType mr, nr; 
    IndexType kc; 
 
    const T Zero(0); 
 
    const T *nextA; 
    const T *nextB; 
 
    for (IndexType j=0; j<np; ++j) { 
        nr    = (j!=np-1 || nr_==0) ? NR : nr_; 
        nextB = &B_[j*mc*NR]; 
 
 
        IndexType ia = 0; 
        for (IndexType i=0; i<mp; ++i) { 
            mr    = (i!=mp-1 || mr_==0) ? MR : mr_; 
            kc    = std::max(mc - i*MR, IndexType(0)); 
            nextA = &A_[(ia+1)*MR]; 
 
            if (i==mp-1) { 
                nextA = A_; 
                nextB = &B_[(j+1)*mc*NR]; 
                if (j==np-1) { 
                    nextB = B_; 
                } 
            } 
 
            if (mr==MR && nr==NR) { 
                ugemm(kc, 
                      alpha, &A_[ia*MR], &B_[j*mc*NR+i*MR*NR], 
                      Zero, 
                      &B[i*MR*incRowB+j*NR*incColB], incRowB, incColB, 
                      nextA, nextB); 
            } else { 
                // Call the buffered micro kernel 
                ugemm(mr, nr, kc, 
                      alpha, &A_[ia*MR], &B_[j*mc*NR+i*MR*NR], 
                      Zero, 
                      &B[i*MR*incRowB+j*NR*incColB], incRowB, incColB, 
                      nextA, nextB); 
            } 
            ia += kc; 
        } 
    } 
} 
 
} // namespace ulmBLAS 
 
#endif // ULMBLAS_IMPL_LEVEL3_MKERNEL_MTRUMM_TCC 
 
 |