1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
#include <cstdio>
#include <cassert>
#include <chrono>
#include <complex>
#include <cmath>
#include <limits>
#include <random>
#include "common.h"

int
main()
{
    typedef flens::TrMatrix<flens::FullStorage<TYPE_A> >       TrMatrixA;
    typedef flens::GeMatrix<flens::FullStorage<TYPE_B> >       GeMatrixB;

    TYPE_ALPHA   alpha = ALPHA;

    const std::size_t min_m = MIN_M;
    const std::size_t min_n = MIN_N;

    const std::size_t max_m = MAX_M;
    const std::size_t max_n = MAX_N;

    const std::size_t inc_m = INC_M;
    const std::size_t inc_n = INC_N;

    std::printf("#%5s %5s ", "m", "n");
    std::printf("%20s %9s", "FLENS/" BLAS_LIB ": t", "MFLOPS");
    std::printf("%20s %9s %9s", "ulmBLAS: t", "MFLOPS", "Residual");
    std::printf(" %10s", "t2/t1*100");
    std::printf("\n");

    WallTime<double>  walltime;

    for (std::size_t m=min_m, n=min_n;
         m<=max_m && n<=max_n;
         m += inc_m, n += inc_n)
    {
        TrMatrixA   A(m, flens::Lower, flens::Unit);
        GeMatrixB   B1(m, n);
        GeMatrixB   B2(m, n);

        fill(A);
        fill(B1);
        B2 = B1;

        walltime.tic();
        flens::blas::sm(flens::Left, flens::NoTrans, alpha, A, B1);
        double t1 = walltime.toc();


        walltime.tic();
        cxxblas::internal::trsm(true,
                                B2.numRows(), B2.numCols(),
                                alpha,
                                true, false, false, true,
                                A.data(), A.strideRow(), A.strideCol(),
                                B2.data(), B2.strideRow(), B2.strideCol());
        double t2 = walltime.toc();

        double res = asumDiff(B1, B2);

        double mflops = (n * m * (m + 1.0 ) / 2.0)
                      + (n * m * (m - 1.0 ) / 2.0);
        mflops /= 1000000;

        std::printf(" %5ld %5ld ", m, n);
        std::printf("%20.4lf %9.2lf", t1, mflops/t1);
        std::printf("%20.4lf %9.2lf", t2, mflops/t2);
        std::printf(" %9.1e", res);
        std::printf(" %10.2lf", t2/t1*100);
        std::printf("\n");
    }

}