#include <cstdio>
#include <cassert>
#include <chrono>
#include <complex>
#include <cmath>
#include <limits>
#include <random>
#include <flens/flens.cxx>
#ifndef MAX_N
#define MAX_N 4000
#endif
#ifndef MAX_M
#define MAX_M 4000
#endif
#include "common.h"
#define BLASINT int64_t
//------------------------------------------------------------------------------
extern "C" {
void
sgemv_(const char *trans,
const BLASINT *M, const BLASINT *N,
const float *alpha,
const float *A, const BLASINT *ldA,
const float *x, const BLASINT *incx,
const float *beta,
float *y, const BLASINT *incy);
void
dgemv_(const char *trans,
const BLASINT *M, const BLASINT *N,
const double *alpha,
const double *A, const BLASINT *ldA,
const double *x, const BLASINT *incx,
const double *beta,
double *y, const BLASINT *incy);
void
cgemv_(const char *trans,
const BLASINT *M, const BLASINT *N,
const float *alpha,
const float *A, const BLASINT *ldA,
const float *x, const BLASINT *incx,
const float *beta,
float *y, const BLASINT *incy);
void
zgemv_(const char *trans,
const BLASINT *M, const BLASINT *N,
const double *alpha,
const double *A, const BLASINT *ldA,
const double *x, const BLASINT *incx,
const double *beta,
double *y, const BLASINT *incy);
} // extern "C"
//------------------------------------------------------------------------------
template <typename Index>
void
f77_gemv(char transA,
Index m, Index n,
float alpha,
const float *A, Index ldA,
const float *X, Index incX,
float beta,
float *Y, Index incY)
{
BLASINT M = m;
BLASINT N = n;
BLASINT LDA = ldA;
BLASINT INCX = incX;
BLASINT INCY = incY;
sgemv_(&transA,
&M, &N,
&alpha,
A, &LDA,
X, &INCX,
&beta,
Y, &INCY);
}
template <typename Index>
void
f77_gemv(char transA,
Index m, Index n,
double alpha,
const double *A, Index ldA,
const double *X, Index incX,
double beta,
double *Y, Index incY)
{
BLASINT M = m;
BLASINT N = n;
BLASINT LDA = ldA;
BLASINT INCX = incX;
BLASINT INCY = incY;
dgemv_(&transA,
&M, &N,
&alpha,
A, &LDA,
X, &INCX,
&beta,
Y, &INCY);
}
template <typename Index>
void
f77_gemv(char transA,
Index m, Index n,
const std::complex<float> &alpha,
const std::complex<float> *A, Index ldA,
const std::complex<float> *X, Index incX,
const std::complex<float> &beta,
std::complex<float> *Y, Index incY)
{
BLASINT M = m;
BLASINT N = n;
BLASINT LDA = ldA;
BLASINT INCX = incX;
BLASINT INCY = incY;
cgemv_(&transA,
&M, &N,
(const float *)&alpha,
(const float *)A, &LDA,
(const float *)X, &INCX,
(const float *)&beta,
(float *)Y, &INCY);
}
template <typename Index>
void
f77_gemv(char transA,
Index m, Index n,
const std::complex<double> &alpha,
const std::complex<double> *A, Index ldA,
const std::complex<double> *X, Index incX,
const std::complex<double> &beta,
std::complex<double> *Y, Index incY)
{
BLASINT M = m;
BLASINT N = n;
BLASINT LDA = ldA;
BLASINT INCX = incX;
BLASINT INCY = incY;
zgemv_(&transA,
&M, &N,
(const double *)&alpha,
(const double *)A, &LDA,
(const double *)X, &INCX,
(const double *)&beta,
(double *)Y, &INCY);
}
//------------------------------------------------------------------------------
int
main()
{
typedef flens::GeMatrix<flens::FullStorage<TYPE_A> > GeMatrixA;
typedef flens::DenseVector<flens::Array<TYPE_X> > DenseVectorX;
typedef flens::DenseVector<flens::Array<TYPE_Y> > DenseVectorY;
TYPE_ALPHA alpha = ALPHA;
TYPE_BETA beta = BETA;
const std::size_t min_m = MIN_M;
const std::size_t min_n = MIN_N;
const std::size_t max_m = MAX_M;
const std::size_t max_n = MAX_N;
const std::size_t inc_m = INC_M;
const std::size_t inc_n = INC_N;
std::printf("#%5s %5s ", "m", "n");
std::printf("%20s %9s", "FLENS/ulmBLAS: t", "MFLOPS");
std::printf("%20s %9s %9s", BLAS_LIB ": t", "MFLOPS", "Residual");
std::printf("\n");
WallTime<double> walltime;
for (std::size_t m=min_m, n=min_n;
m<=max_m && n<=max_n;
m += inc_m, n += inc_n)
{
GeMatrixA A(m, n);
DenseVectorX X(n);
DenseVectorY Y1(m);
DenseVectorY Y2(m);
fill(A);
fill(X);
fill(Y1);
Y2 = Y1;
walltime.tic();
flens::blas::mv(flens::NoTrans, alpha, A, X, beta, Y1);
double t1 = walltime.toc();
walltime.tic();
f77_gemv('N',
A.numRows(), A.numCols(),
alpha,
A.data(), A.leadingDimension(),
X.data(), X.stride(),
beta,
Y2.data(), Y2.stride());
double t2 = walltime.toc();
double res = asumDiff(Y1, Y2)/m;
std::printf(" %5ld %5ld ", m, n);
std::printf("%20.4lf %9.2lf", t1, 2.*m/1000.*n/1000./t1);
std::printf("%20.4lf %9.2lf", t2, 2.*m/1000.*n/1000./t2);
std::printf(" %9.1e", res);
std::printf("\n");
}
}