#ifndef HPC_ULMBLAS_PACK_H #define HPC_ULMBLAS_PACK_H 1 #include namespace hpc { namespace ulmblas { template void pack_A(Index mc, Index kc, const TA *A, Index incRowA, Index incColA, T *p) { Index MR = BlockSize::MR; Index mp = (mc+MR-1) / MR; for (Index j=0; j void pack_B(Index kc, Index nc, const TB *B, Index incRowB, Index incColB, T *p) { Index NR = BlockSize::NR; Index np = (nc+NR-1) / NR; for (Index l=0; l