#ifndef HPC_ULMBLAS_GEMM_H #define HPC_ULMBLAS_GEMM_H 1 #include #include #include #include #include #include #include #include namespace hpc { namespace ulmblas { template void gemm(Index m, Index n, Index k, Alpha alpha, const TA *A, Index incRowA, Index incColA, const TB *B, Index incRowB, Index incColB, Beta beta, TC *C, Index incRowC, Index incColC) { typedef typename std::common_type::type T; const Index MC = BlockSize::MC; const Index NC = BlockSize::NC; const Index KC = BlockSize::KC; const Index mb = (m+MC-1) / MC; const Index nb = (n+NC-1) / NC; const Index kb = (k+KC-1) / KC; const Index mc_ = m % MC; const Index nc_ = n % NC; const Index kc_ = k % KC; T *A_ = new T[MC*KC]; T *B_ = new T[KC*NC]; if (alpha==Alpha(0) || k==0) { gescal(m, n, beta, C, incRowC, incColC); return; } for (Index j=0; j