#include <cstddef>
#include <cstdio>
#include "bench.h"
#include "ulmblas.h"
#include "gemm_refcolmajor.h"
#include "gemm_blocked.h"
#include "gematrix.h"
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
#ifndef MAXDIM_M
#define MAXDIM_M 7000
#endif
#ifndef MAXDIM_N
#define MAXDIM_N 7000
#endif
#ifndef MAXDIM_K
#define MAXDIM_K 7000
#endif
#ifndef MIN_M
#define MIN_M 100
#endif
#ifndef MIN_N
#define MIN_N 100
#endif
#ifndef MIN_K
#define MIN_K 100
#endif
#ifndef MAX_M
#define MAX_M 7000
#endif
#ifndef MAX_N
#define MAX_N 7000
#endif
#ifndef MAX_K
#define MAX_K 7000
#endif
#ifndef INC_M
#define INC_M 100
#endif
#ifndef INC_N
#define INC_N 100
#endif
#ifndef INC_K
#define INC_K 100
#endif
#ifndef ALPHA
#define ALPHA 1.5
#endif
#ifndef BETA
#define BETA 1.5
#endif
int
main()
{
using namespace matvec;
typedef double T;
StorageOrder order = (COLMAJOR) ? StorageOrder::ColMajor
: StorageOrder::RowMajor;
GeMatrix<T> A_(MAXDIM_M, MAXDIM_K, order);
GeMatrix<T> B_(MAXDIM_K, MAXDIM_N, order);
GeMatrix<T> C1_(MAXDIM_M, MAXDIM_N, order);
GeMatrix<T> C2_(MAXDIM_M, MAXDIM_N, order);
initGeMatrix(A_);
initGeMatrix(B_);
initGeMatrix(C1_);
gecopy(C1_, C2_);
// Header-Zeile fuer die Ausgabe
printf("%5s %5s %5s ", "m", "n", "k");
printf("%20s %9s", "refColMajor: t", "MFLOPS");
printf("%20s %9s %9s", "blocked GEMM: t", "MFLOPS", "diff");
printf("\n");
bench::WallTime<double> wallTime;
for (long m = MIN_M, n = MIN_N, k = MIN_K;
m <=MAX_M && n <= MAX_N && k <= MAX_K;
m += INC_M, n += INC_N, k += INC_K)
{
double t, diff;
GeMatrixView<T> A = A_(0,0,m,k);
GeMatrixView<T> B = B_(0,0,k,n);
GeMatrixView<T> C1 = C1_(0,0,m,n);
GeMatrixView<T> C2 = C2_(0,0,m,n);
printf("%5ld %5ld %5ld ", m, n, k);
wallTime.tic();
gemm_ref(ALPHA, A, B, BETA, C1);
t = wallTime.toc();
printf("%20.4lf %9.2lf", t, 2.*m/1000*n/1000*k/t);
wallTime.tic();
gemm_blk(ALPHA, A, B, BETA, C2);
t = wallTime.toc();
diff = asumDiffGeMatrix(C1, C2);
printf("%20.4lf %9.2lf %9.1e", t, 2.*m/1000*n/1000*k/t, diff);
printf("\n");
}
}