1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
#include <cstdio>
#include <cassert>
#include <chrono>
#include <complex>
#include <cmath>
#include <limits>
#include <random>
#include <flens/flens.cxx>
#include "common.h"

int
main()
{
    typedef flens::GeMatrix<flens::FullStorage<TYPE_A> >       GeMatrixA;
    typedef flens::GeMatrix<flens::FullStorage<TYPE_B> >       GeMatrixB;
    typedef flens::GeMatrix<flens::FullStorage<TYPE_C> >       GeMatrixC;

    TYPE_ALPHA   alpha = ALPHA;
    TYPE_BETA    beta  = BETA;

    const std::size_t min_m = MIN_M;
    const std::size_t min_n = MIN_N;
    const std::size_t min_k = MIN_K;

    const std::size_t max_m = MAX_M;
    const std::size_t max_n = MAX_N;
    const std::size_t max_k = MAX_K;

    const std::size_t inc_m = INC_M;
    const std::size_t inc_n = INC_N;
    const std::size_t inc_k = INC_K;

    std::printf("#%5s %5s %5s ", "m", "n", "k");
    std::printf("%20s %9s", "FLENS/ulmBLAS: t", "MFLOPS");
    std::printf("%20s %9s %9s", BLAS_LIB  ":  t", "MFLOPS", "Residual");
    std::printf(" %10s", "t2/t1*100");
    std::printf("\n");

    WallTime<double>  walltime;

    for (std::size_t m=min_m, k=min_k, n=min_n;
         m<=max_m && k<=max_k && n<=max_n;
         m += inc_m, k += inc_k, n += inc_n)
    {
        GeMatrixA   A(m, k);
        GeMatrixB   B(k, n);
        GeMatrixC   C1(m, n);
        GeMatrixC   C2(m, n);

        fill(A);
        fill(B);
        fill(C1);
        C2 = C1;

        walltime.tic();
        flens::blas::mm(flens::NoTrans, flens::NoTrans, alpha, A, B, beta, C1);
        double t1 = walltime.toc();


        walltime.tic();
        cxxblas::internal::gemm(C2.numRows(), C2.numCols(), A.numCols(),
                                alpha,
                                false, false, A.data(), A.strideRow(), A.strideCol(),
                                false, false, B.data(), B.strideRow(), B.strideCol(),
                                beta,
                                C2.data(), C2.strideRow(), C2.strideCol());
        double t2 = walltime.toc();

        double res = estimateGemmResidual(A, B, C1, C2);

        std::printf(" %5ld %5ld %5ld ", m, n, k);
        std::printf("%20.4lf %9.2lf", t1, 2.*m/1000.*n/1000.*k/t1);
        std::printf("%20.4lf %9.2lf", t2, 2.*m/1000.*n/1000.*k/t2);
        std::printf(" %9.1e", res);
        std::printf(" %10.2lf", t2/t1*100);
        std::printf("\n");
    }

}