#ifndef HPC_ULMBLAS_KERNELS_UGEMM_REF_H #define HPC_ULMBLAS_KERNELS_UGEMM_REF_H 1 #include namespace hpc { namespace ulmblas { template void ugemm(Index kc, T alpha, const T *A, const T *B, T beta, T *C, Index incRowC, Index incColC) { const Index MR = BlockSize::MR; const Index NR = BlockSize::NR; T P[BlockSize::MR*BlockSize::NR]; for (Index l=0; l