#include <iostream>
#include <blaze/Math.h>
#include <cassert>
#include <chrono>
#include <cmath>
#include <limits>
#include <random>
#include "gemm.h"
template <typename T>
struct WallTime
{
void
tic()
{
t0 = std::chrono::high_resolution_clock::now();
}
T
toc()
{
using namespace std::chrono;
elapsed = high_resolution_clock::now() - t0;
return duration<T,seconds::period>(elapsed).count();
}
std::chrono::high_resolution_clock::time_point t0;
std::chrono::high_resolution_clock::duration elapsed;
};
// fill rectangular matrix with random values
template <typename MATRIX>
void
fill(MATRIX &A)
{
typedef typename MATRIX::ElementType T;
std::random_device random;
std::default_random_engine mt(random());
std::uniform_real_distribution<T> uniform(-100,100);
for (std::size_t i=0; i<(~A).rows(); ++i) {
for (std::size_t j=0; j<(~A).columns(); ++j) {
A(i,j) = uniform(mt);
}
}
}
template <typename MATRIX>
typename MATRIX::ElementType
asum(const MATRIX &A)
{
typedef typename MATRIX::ElementType T;
T asum = 0;
for (std::size_t i=0; i<A.rows(); ++i) {
for (std::size_t j=0; j<A.columns(); ++j) {
asum += std::abs(A(i,j));
}
}
return asum;
}
template <typename MA, typename MB, typename MC0, typename MC1>
double
estimateGemmResidual(const MA &A, const MB &B,
const MC0 &C0, const MC1 &C1)
{
typedef typename MC0::ElementType TC0;
std::size_t m= C1.rows();
std::size_t n= C1.columns();
std::size_t k= A.columns();
double aNorm = asum(A);
double bNorm = asum(B);
double cNorm = asum(C1);
double diff = asum(C1-C0);
// Using eps for double gives upper bound in case elements have lower
// precision.
double eps = std::numeric_limits<double>::epsilon();
double res = diff/(aNorm*bNorm*cNorm*eps*std::max(std::max(m,n),k));
return res;
}
#ifndef M_MAX
#define M_MAX 10000
#endif
#ifndef K_MAX
#define K_MAX 10000
#endif
#ifndef N_MAX
#define N_MAX 10000
#endif
#ifndef ALPHA
#define ALPHA 1
#endif
#ifndef BETA
#define BETA 0
#endif
#ifndef USE_SOA
#define USE_SOA blaze::rowMajor
#endif
#ifndef USE_SOB
#define USE_SOB blaze::rowMajor
#endif
#ifndef USE_SOC
#define USE_SOC blaze::rowMajor
#endif
int
main()
{
typedef double ElementType;
constexpr auto SOA = USE_SOA;
constexpr auto SOB = USE_SOB;
constexpr auto SOC = USE_SOC;
ElementType alpha = ALPHA;
ElementType beta = ALPHA;
const std::size_t m_min = 100;
const std::size_t k_min = 100;
const std::size_t n_min = 100;
const std::size_t m_max = M_MAX;
const std::size_t k_max = K_MAX;
const std::size_t n_max = N_MAX;
const std::size_t m_inc = 100;
const std::size_t k_inc = 100;
const std::size_t n_inc = 100;
std::cout << "# m";
std::cout << " n";
std::cout << " k";
std::cout << " C++GEMM t1";
std::cout << " MFLOPS";
std::cout << " BLAZE/BLAS t2";
std::cout << " MFLOPS";
std::cout << " Residual ";
std::cout << std::endl;
WallTime<double> walltime;
for (std::size_t m=m_min, k=k_min, n=n_min;
m<=m_max && k<=k_max && n<=n_max;
m += m_inc, k += k_inc, n += n_inc)
{
blaze::DynamicMatrix<double, SOA> A(m, k);
blaze::DynamicMatrix<double, SOB> B(k, n);
blaze::DynamicMatrix<double, SOC> C1(m, n);
blaze::DynamicMatrix<double, SOC> C2(m, n);
fill(A);
fill(B);
fill(C1);
C2 = C1;
walltime.tic();
foo::gemm(alpha, A, B, beta, C1);
double t1 = walltime.toc();
walltime.tic();
#if BLAZE_BLAS_MODE==1
blaze::dgemm(C2, A, B, alpha, beta);
#else
if (beta==ElementType(0)) {
C2 = alpha*A*B;
} else if (beta==ElementType(1)) {
C2 = C2 + alpha*A*B;
} else {
C2 = beta*C2;
C2 + alpha*A*B;
}
#endif
double t2 = walltime.toc();
double res = estimateGemmResidual(A, B, C1, C2);
std::cout.width(5); std::cout << m << " ";
std::cout.width(5); std::cout << n << " ";
std::cout.width(5); std::cout << k << " ";
std::cout.width(12); std::cout << t1 << " ";
std::cout.width(12); std::cout << 2.*m/1000.*n/1000.*k/t1 << " ";
std::cout.width(15); std::cout << t2 << " ";
std::cout.width(12); std::cout << 2.*m/1000.*n/1000.*k/t2 << " ";
std::cout.width(15); std::cout << res;
std::cout << std::endl;
}
}