#include <cstdio> #include <cassert> #include <chrono> #include <complex> #include <cmath> #include <limits> #include <random> #include "common.h" int main() { typedef flens::TrMatrix<flens::FullStorage<TYPE_A> > TrMatrixA; typedef flens::GeMatrix<flens::FullStorage<TYPE_B> > GeMatrixB; TYPE_ALPHA alpha = ALPHA; const std::size_t min_m = MIN_M; const std::size_t min_n = MIN_N; const std::size_t max_m = MAX_M; const std::size_t max_n = MAX_N; const std::size_t inc_m = INC_M; const std::size_t inc_n = INC_N; std::printf("#%5s %5s ", "m", "n"); std::printf("%20s %9s", "FLENS/" BLAS_LIB ": t", "MFLOPS"); std::printf("%20s %9s %9s", "ulmBLAS: t", "MFLOPS", "Residual"); std::printf(" %10s", "t2/t1*100"); std::printf("\n"); WallTime<double> walltime; for (std::size_t m=min_m, n=min_n; m<=max_m && n<=max_n; m += inc_m, n += inc_n) { TrMatrixA A(m, flens::Lower, flens::Unit); GeMatrixB B1(m, n); GeMatrixB B2(m, n); fill(A); fill(B1); B2 = B1; walltime.tic(); flens::blas::sm(flens::Left, flens::NoTrans, alpha, A, B1); double t1 = walltime.toc(); walltime.tic(); cxxblas::internal::trsm(true, B2.numRows(), B2.numCols(), alpha, true, false, false, true, A.data(), A.strideRow(), A.strideCol(), B2.data(), B2.strideRow(), B2.strideCol()); double t2 = walltime.toc(); double res = asumDiff(B1, B2); double mflops = (n * m * (m + 1.0 ) / 2.0) + (n * m * (m - 1.0 ) / 2.0); mflops /= 1000000; std::printf(" %5ld %5ld ", m, n); std::printf("%20.4lf %9.2lf", t1, mflops/t1); std::printf("%20.4lf %9.2lf", t2, mflops/t2); std::printf(" %9.1e", res); std::printf(" %10.2lf", t2/t1*100); std::printf("\n"); } } |