#include <cstdio> #include <cassert> #include <chrono> #include <complex> #include <cmath> #include <limits> #include <random> #include <flens/flens.cxx> #include "common.h" int main() { typedef flens::GeMatrix<flens::FullStorage<TYPE_A> > GeMatrixA; typedef flens::GeMatrix<flens::FullStorage<TYPE_B> > GeMatrixB; typedef flens::GeMatrix<flens::FullStorage<TYPE_C> > GeMatrixC; TYPE_ALPHA alpha = ALPHA; TYPE_BETA beta = BETA; const std::size_t min_m = MIN_M; const std::size_t min_n = MIN_N; const std::size_t min_k = MIN_K; const std::size_t max_m = MAX_M; const std::size_t max_n = MAX_N; const std::size_t max_k = MAX_K; const std::size_t inc_m = INC_M; const std::size_t inc_n = INC_N; const std::size_t inc_k = INC_K; std::printf("#%5s %5s %5s ", "m", "n", "k"); std::printf("%20s %9s", "FLENS/ulmBLAS: t", "MFLOPS"); std::printf("%20s %9s %9s", BLAS_LIB ": t", "MFLOPS", "Residual"); std::printf(" %10s", "t2/t1*100"); std::printf("\n"); WallTime<double> walltime; for (std::size_t m=min_m, k=min_k, n=min_n; m<=max_m && k<=max_k && n<=max_n; m += inc_m, k += inc_k, n += inc_n) { GeMatrixA A(m, k); GeMatrixB B(k, n); GeMatrixC C1(m, n); GeMatrixC C2(m, n); fill(A); fill(B); fill(C1); C2 = C1; walltime.tic(); flens::blas::mm(flens::NoTrans, flens::NoTrans, alpha, A, B, beta, C1); double t1 = walltime.toc(); walltime.tic(); cxxblas::internal::gemm(C2.numRows(), C2.numCols(), A.numCols(), alpha, false, false, A.data(), A.strideRow(), A.strideCol(), false, false, B.data(), B.strideRow(), B.strideCol(), beta, C2.data(), C2.strideRow(), C2.strideCol()); double t2 = walltime.toc(); double res = estimateGemmResidual(A, B, C1, C2); std::printf(" %5ld %5ld %5ld ", m, n, k); std::printf("%20.4lf %9.2lf", t1, 2.*m/1000.*n/1000.*k/t1); std::printf("%20.4lf %9.2lf", t2, 2.*m/1000.*n/1000.*k/t2); std::printf(" %9.1e", res); std::printf(" %10.2lf", t2/t1*100); std::printf("\n"); } } |