#ifndef HPC_ULMBLAS_MGEMM_H #define HPC_ULMBLAS_MGEMM_H 1 #include #include #include #include #include namespace hpc { namespace ulmblas { template void mgemm(Index mc, Index nc, Index kc, T alpha, const T *A, const T *B, Beta beta, TC *C, Index incRowC, Index incColC) { Index MR = BlockSize::MR; Index NR = BlockSize::NR; T C_[BlockSize::MR*BlockSize::NR]; Index mp = (mc+MR-1) / MR; Index np = (nc+NR-1) / NR; Index mr_ = mc % MR; Index nr_ = nc % NR; for (Index j=0; j