#include <cstdio>
#include <cassert>
#include <chrono>
#include <complex>
#include <cmath>
#include <limits>
#include <random>
#include <flens/flens.cxx>
#ifndef MAX_N
#define MAX_N 4000
#endif
#ifndef MAX_M
#define MAX_M 4000
#endif
#include "common.h"
#define BLASINT int64_t
//------------------------------------------------------------------------------
extern "C" {
void
sger_(const BLASINT *m, const BLASINT *n,
const float *alpha,
const float *x, const BLASINT *incx,
const float *y, const BLASINT *incy,
float *A, const BLASINT *lda);
void
dger_(const BLASINT *m, const BLASINT *n,
const double *alpha,
const double *x, const BLASINT *incx,
const double *y, const BLASINT *incy,
double *A, const BLASINT *lda);
void
cgeru_(const BLASINT *m, const BLASINT *n,
const float *alpha,
const float *x, const BLASINT *incx,
const float *y, const BLASINT *incy,
float *A, const BLASINT *lda);
void
zgeru_(const BLASINT *m, const BLASINT *n,
const double *alpha,
const double *x, const BLASINT *incx,
const double *y, const BLASINT *incy,
double *A, const BLASINT *lda);
void
cgerc_(const BLASINT *m, const BLASINT *n,
const float *alpha,
const float *x, const BLASINT *incx,
const float *y, const BLASINT *incy,
float *A, const BLASINT *lda);
void
zgerc_(const BLASINT *m, const BLASINT *n,
const double *alpha,
const double *x, const BLASINT *incx,
const double *y, const BLASINT *incy,
double *A, const BLASINT *lda);
} // extern "C"
//------------------------------------------------------------------------------
template <typename Index>
void
f77_ger(Index m, Index n,
float alpha,
const float *X, Index incX,
const float *Y, Index incY,
float *A, Index ldA)
{
BLASINT M = m;
BLASINT N = n;
BLASINT INCX = incX;
BLASINT INCY = incY;
BLASINT LDA = ldA;
sger_(&M, &N,
&alpha,
X, &INCX,
Y, &INCY,
A, &LDA);
}
template <typename Index>
void
f77_ger(Index m, Index n,
double alpha,
const double *X, Index incX,
const double *Y, Index incY,
double *A, Index ldA)
{
BLASINT M = m;
BLASINT N = n;
BLASINT INCX = incX;
BLASINT INCY = incY;
BLASINT LDA = ldA;
dger_(&M, &N,
&alpha,
X, &INCX,
Y, &INCY,
A, &LDA);
}
template <typename Index>
void
f77_ger(Index m, Index n,
const std::complex<float> &alpha,
const std::complex<float> *X, Index incX,
const std::complex<float> *Y, Index incY,
std::complex<float> *A, Index ldA)
{
BLASINT M = m;
BLASINT N = n;
BLASINT INCX = incX;
BLASINT INCY = incY;
BLASINT LDA = ldA;
cgeru_(&M, &N,
(const float *)&alpha,
(const float *)X, &INCX,
(const float *)Y, &INCY,
(float *)A, &LDA);
}
template <typename Index>
void
f77_ger(Index m, Index n,
const std::complex<double> &alpha,
const std::complex<double> *X, Index incX,
const std::complex<double> *Y, Index incY,
std::complex<double> *A, Index ldA)
{
BLASINT M = m;
BLASINT N = n;
BLASINT INCX = incX;
BLASINT INCY = incY;
BLASINT LDA = ldA;
zgeru_(&M, &N,
(const double *)&alpha,
(const double *)X, &INCX,
(const double *)Y, &INCY,
(double *)A, &LDA);
}
//------------------------------------------------------------------------------
int
main()
{
typedef flens::DenseVector<flens::Array<TYPE_X> > DenseVectorX;
typedef flens::DenseVector<flens::Array<TYPE_Y> > DenseVectorY;
typedef flens::GeMatrix<flens::FullStorage<TYPE_A> > GeMatrixA;
TYPE_ALPHA alpha = ALPHA;
const std::size_t min_m = MIN_M;
const std::size_t min_n = MIN_N;
const std::size_t max_m = MAX_M;
const std::size_t max_n = MAX_N;
const std::size_t inc_m = INC_M;
const std::size_t inc_n = INC_N;
std::printf("#%5s %5s ", "m", "n");
std::printf("%20s %9s", "FLENS/ulmBLAS: t", "MFLOPS");
std::printf("%20s %9s %9s", BLAS_LIB ": t", "MFLOPS", "Residual");
std::printf("\n");
WallTime<double> walltime;
for (std::size_t m=min_m, n=min_n;
m<=max_m && n<=max_n;
m += inc_m, n += inc_n)
{
DenseVectorX X(m);
DenseVectorY Y(n);
GeMatrixA A1(m, n);
GeMatrixA A2(m, n);
fill(X);
fill(Y);
fill(A1);
A2 = A1;
walltime.tic();
flens::blas::r(alpha, X, Y, A1);
double t1 = walltime.toc();
walltime.tic();
f77_ger(A2.numRows(), A2.numCols(),
alpha,
X.data(), X.stride(),
Y.data(), Y.stride(),
A2.data(), A2.leadingDimension());
double t2 = walltime.toc();
double res = asumDiff(A1, A2)/(m*n);
std::printf(" %5ld %5ld ", m, n);
std::printf("%20.4lf %9.2lf", t1, 2.*m/1000.*n/1000./t1);
std::printf("%20.4lf %9.2lf", t2, 2.*m/1000.*n/1000./t2);
std::printf(" %9.1e", res);
std::printf("\n");
}
}