| 
 
/* 
 * Copyright (C) 2014, The University of Texas at Austin 
 * 
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are 
 * met: 
 *  - Redistributions of source code must retain the above copyright 
 *    notice, this list of conditions and the following disclaimer. 
 *  - Redistributions in binary form must reproduce the above copyright 
 *    notice, this list of conditions and the following disclaimer in the 
 *    documentation and/or other materials provided with the distribution. 
 *  - Neither the name of The University of Texas at Austin nor the names 
 *    of its contributors may be used to endorse or promote products 
 *    derived from this software without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 
 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 
 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 
 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
 * 
 */ 
 
/* 
 * Copyright (C) 2014-2015, Michael Lehn 
 * 
 * ulmBLAS adopted general ideas from BLIS.  Using micro kernels from BLIS 
 * only requires minor modifications, 
 * 
 */ 
 
#ifndef ULMBLAS_IMPL_LEVEL3_UKERNEL_REF_UGEMM_TCC 
#define ULMBLAS_IMPL_LEVEL3_UKERNEL_REF_UGEMM_TCC 1 
 
#include <ulmblas/impl/level3/ukernel/ref/ugemm.h> 
 
namespace ulmBLAS { namespace ref { 
 
template <typename IndexType, typename T> 
void 
ugemm(IndexType   kc, 
      const T     &alpha, 
      const T     *A, 
      const T     *B, 
      const T     &beta, 
      T           *C, 
      IndexType   incRowC, 
      IndexType   incColC, 
      const T     *, 
      const T     *) 
{ 
    const IndexType MR = BlockSizeUGemm<T>::MR; 
    const IndexType NR = BlockSizeUGemm<T>::NR; 
 
    T AB[MR*NR]; 
 
    for (IndexType i=0; i<MR*NR; ++i) { 
        AB[i] = T(0); 
    } 
 
    for (IndexType l=0; l<kc; ++l) { 
        for (IndexType j=0; j<NR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                AB[i+j*MR] += A[i]*B[j]; 
            } 
        } 
        A += MR; 
        B += NR; 
    } 
 
    if (beta==T(0)) { 
        for (IndexType j=0; j<NR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                C[i*incRowC+j*incColC] = T(0); 
            } 
        } 
    } else { 
        for (IndexType j=0; j<NR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                C[i*incRowC+j*incColC] *= beta; 
            } 
        } 
    } 
 
    if (alpha==T(1)) { 
        for (IndexType j=0; j<NR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                C[i*incRowC+j*incColC] += AB[i+j*MR]; 
            } 
        } 
    } else { 
        for (IndexType j=0; j<NR; ++j) { 
            for (IndexType i=0; i<MR; ++i) { 
                C[i*incRowC+j*incColC] += alpha*AB[i+j*MR]; 
            } 
        } 
    } 
} 
 
} } // namespace ref, ulmBLAS 
 
#endif // ULMBLAS_IMPL_LEVEL3_UKERNEL_REF_UGEMM_TCC 1 
 
 |