#ifndef HPC_ULMBLAS_KERNELS_UGEMM_BUF_H #define HPC_ULMBLAS_KERNELS_UGEMM_BUF_H 1 #include namespace hpc { namespace ulmblas { template void ugemm(Index kc, T alpha, const T *A, const T *B, Beta beta, TC *C, Index incRowC, Index incColC) { const Index MR = BlockSize::MR; const Index NR = BlockSize::NR; T P[BlockSize::MR*BlockSize::NR]; std::fill_n(P, MR*NR, T(0)); ugemm(kc, alpha, A, B, T(0), P, Index(1), MR); gescal(MR, NR, beta, C, incRowC, incColC); geaxpy(MR, NR, T(1), P, Index(1), MR, C, incRowC, incColC); } } } // namespace ulmblas, hpc #endif // HPC_ULMBLAS_KERNELS_UGEMM_BUF_H