#include <cstdio>
#include <cassert>
#include <chrono>
#include <complex>
#include <cmath>
#include <limits>
#include <random>
#include <flens/flens.cxx>
#include "common.h"
#define BLASINT int64_t
//------------------------------------------------------------------------------
extern "C" {
void
sgemm_(const char *transA, const char *transB,
const BLASINT *M, const BLASINT *N, const BLASINT *K,
const float *alpha,
const float *A, const BLASINT *LDA,
const float *B, const BLASINT *LDB,
const float *beta,
float *C, const BLASINT *LDC);
void
dgemm_(const char *transA, const char *transB,
const BLASINT *M, const BLASINT *N, const BLASINT *K,
const double *alpha,
const double *A, const BLASINT *LDA,
const double *B, const BLASINT *LDB,
const double *beta,
double *C, const BLASINT *LDC);
void
cgemm_(const char *transA, const char *transB,
const BLASINT *M, const BLASINT *N, const BLASINT *K,
const float *alpha,
const float *A, const BLASINT *LDA,
const float *B, const BLASINT *LDB,
const float *beta,
float *C, const BLASINT *LDC);
void
zgemm_(const char *transA, const char *transB,
const BLASINT *M, const BLASINT *N, const BLASINT *K,
const double *alpha,
const double *A, const BLASINT *LDA,
const double *B, const BLASINT *LDB,
const double *beta,
double *C, const BLASINT *LDC);
} // extern "C"
//------------------------------------------------------------------------------
template <typename Index>
void
f77_gemm(char transA, char transB,
Index m, Index n, Index k,
float alpha,
const float *A, Index ldA,
const float *B, Index ldB,
float beta,
float *C, Index ldC)
{
BLASINT M = m;
BLASINT N = n;
BLASINT K = k;
BLASINT LDA = ldA;
BLASINT LDB = ldB;
BLASINT LDC = ldC;
sgemm_(&transA, &transB,
&M, &N, &K,
&alpha,
A, &LDA,
B, &LDB,
&beta,
C, &LDC);
}
template <typename Index>
void
f77_gemm(char transA, char transB,
Index m, Index n, Index k,
double alpha,
const double *A, Index ldA,
const double *B, Index ldB,
double beta,
double *C, Index ldC)
{
BLASINT M = m;
BLASINT N = n;
BLASINT K = k;
BLASINT LDA = ldA;
BLASINT LDB = ldB;
BLASINT LDC = ldC;
dgemm_(&transA, &transB,
&M, &N, &K,
&alpha,
A, &LDA,
B, &LDB,
&beta,
C, &LDC);
}
template <typename Index>
void
f77_gemm(char transA, char transB,
Index m, Index n, Index k,
std::complex<float> alpha,
const std::complex<float> *A, Index ldA,
const std::complex<float> *B, Index ldB,
std::complex<float> beta,
std::complex<float> *C, Index ldC)
{
BLASINT M = m;
BLASINT N = n;
BLASINT K = k;
BLASINT LDA = ldA;
BLASINT LDB = ldB;
BLASINT LDC = ldC;
cgemm_(&transA, &transB,
&M, &N, &K,
(const float*) &alpha,
(const float*) A, &LDA,
(const float*) B, &LDB,
(const float*) &beta,
(float*) C, &LDC);
}
template <typename Index>
void
f77_gemm(char transA, char transB,
Index m, Index n, Index k,
std::complex<double> alpha,
const std::complex<double> *A, Index ldA,
const std::complex<double> *B, Index ldB,
std::complex<double> beta,
std::complex<double> *C, Index ldC)
{
BLASINT M = m;
BLASINT N = n;
BLASINT K = k;
BLASINT LDA = ldA;
BLASINT LDB = ldB;
BLASINT LDC = ldC;
zgemm_(&transA, &transB,
&M, &N, &K,
(const double*) &alpha,
(const double*) A, &LDA,
(const double*) B, &LDB,
(const double*) &beta,
(double*) C, &LDC);
}
//------------------------------------------------------------------------------
int
main()
{
typedef flens::GeMatrix<flens::FullStorage<TYPE_A> > GeMatrixA;
typedef flens::GeMatrix<flens::FullStorage<TYPE_B> > GeMatrixB;
typedef flens::GeMatrix<flens::FullStorage<TYPE_C> > GeMatrixC;
TYPE_ALPHA alpha = ALPHA;
TYPE_BETA beta = BETA;
const std::size_t min_m = MIN_M;
const std::size_t min_n = MIN_N;
const std::size_t min_k = MIN_K;
const std::size_t max_m = MAX_M;
const std::size_t max_n = MAX_N;
const std::size_t max_k = MAX_K;
const std::size_t inc_m = INC_M;
const std::size_t inc_n = INC_N;
const std::size_t inc_k = INC_K;
std::printf("#%5s %5s %5s ", "m", "n", "k");
std::printf("%20s %9s", "FLENS/ulmBLAS: t", "MFLOPS");
std::printf("%20s %9s %9s", BLAS_LIB ": t", "MFLOPS", "Residual");
std::printf("\n");
WallTime<double> walltime;
for (std::size_t m=min_m, k=min_k, n=min_n;
m<=max_m && k<=max_k && n<=max_n;
m += inc_m, k += inc_k, n += inc_n)
{
GeMatrixA A(m, k);
GeMatrixB B(k, n);
GeMatrixC C1(m, n);
GeMatrixC C2(m, n);
fill(A);
fill(B);
fill(C1);
C2 = C1;
walltime.tic();
flens::blas::mm(flens::NoTrans, flens::NoTrans, alpha, A, B, beta, C1);
double t1 = walltime.toc();
walltime.tic();
f77_gemm('N', 'N',
C2.numRows(), C2.numCols(), A.numCols(),
alpha,
A.data(), A.leadingDimension(),
B.data(), B.leadingDimension(),
beta,
C2.data(), C2.leadingDimension());
double t2 = walltime.toc();
double res = estimateGemmResidual(A, B, C1, C2);
std::printf(" %5ld %5ld %5ld ", m, n, k);
std::printf("%20.4lf %9.2lf", t1, 2.*m/1000.*n/1000.*k/t1);
std::printf("%20.4lf %9.2lf", t2, 2.*m/1000.*n/1000.*k/t2);
std::printf(" %9.1e", res);
std::printf("\n");
}
}