#include <math.h>
#include <stddef.h>
#include <stdlib.h>
void
dinit(size_t n, double *x, ptrdiff_t incX, bool withNan)
{
for (size_t i=0; i<m; ++i) {
x[i*incX] = withNan ? nan("")
: drand48();
}
}
void
dgeinit(size_t m, size_t n, double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
bool withNan)
{
if (incRowA>incColA) {
dgeinit(n, m, A, incColA, incRowA);
return;
}
for (size_t j=0; j<n; ++j) {
dinit(m, &A[j*incColA], incRowA, withNan);
}
}
double
walltime()
{
struct tms ts;
static double ClockTick=0.0;
if (ClockTick==0.0) {
ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
}
return ((double) times(&ts)) * ClockTick;
}
void
dcopy(size_t n, const double *x, ptrdiff_t incX, double *y, ptrdiff_t incY)
{
for (size_t i=0; i<n; ++i) {
y[i*incY] = x[i*incX];
}
}
void
dscal(size_t n, double alpha, double *x, ptrdiff_t incX)
{
if (alpha==1) {
return;
}
if (alpha==0) {
for (size_t i=0; i<n; ++i) {
x[i*incX] = 0;
}
} else {
for (size_t i=0; i<n; ++i) {
x[i*incX] *= alpha;
}
}
}
void
daxpy(size_t n, double alpha, const double *x, ptrdiff_t incX,
double *y, ptrdiff_t incY)
{
if (alpha==0) {
return;
}
for (size_t i=0; i<n; ++i) {
y[i*incY] += alpha*x[i*incX];
}
}
double
ddot(size_t n, const double *x, ptrdiff_t incX, const double *y, ptrdiff_t incY)
{
double result = 0;
for (size_t i=0; i<n; ++i) {
result += x[i*incX]*y[i*incY];
}
return result;
}
void
dgemv_ref(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
double beta,
double *y, ptrdiff_t incY)
{
dscal(m, beta, y, incY);
for (size_t j=0; j<n; ++j) {
for (size_t i=0; i<m; ++i) {
y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
}
}
}
void
dgemv_col(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
double beta,
double *y, ptrdiff_t incY)
{
dscal(m, beta, y, incY);
for (size_t j=0; j<n; ++j) {
daxpy(m, alpha*x[j*incX], A[j*incColA], incRowA, y, incY);
}
}
void
dgemv_row(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
double beta,
double *y, ptrdiff_t incY)
{
dscal(m, beta, y, incY);
for (size_t i=0; i<m; ++i) {
y[i*incY] += alpha*ddot(n, A[i*incRowA], incColA, x, incX);
}
}
#ifndef DIM_MIN_M
#define DIM_MIN_M 100
#endif
#ifndef DIM_MAX_M
#define DIM_MAX_M 3000
#endif
#ifndef DIM_INC_M
#define DIM_INC_M 3000
#endif
#ifndef DIM_MIN_N
#define DIM_MIN_N 100
#endif
#ifndef DIM_MAX_N
#define DIM_MAX_N 3000
#endif
#ifndef DIM_INC_N
#define DIM_INC_N 3000
#endif
#ifndef ALPHA
#define ALPHA 1
#endif
#ifndef BETA
#define BETA 1
#endif
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
int
main()
{
ptrdiff_t incRowA = COLMAJOR ? 1 : DIM_MAX_N;
ptrdiff_t incColA = COLMAJOR ? DIM_MAX_M : 1;
double *A = malloc(DIM_MAX_M*DIM_MAX_N*sizeof(double));
ptrdiff_t incX = 1;
double *x = malloc(DIM_MAX_N*sizeof(double));
ptrdiff_t incY = 1;
double *y0 = malloc(DIM_MAX_M*sizeof(double));
double *yRef = malloc(DIM_MAX_M*sizeof(double));
double *yTst = malloc(DIM_MAX_M*sizeof(double));
double alpha = ALPHA;
double beta = BETA;
bool testFailed = false;
for (size_t m=DIM_MIN_M, n=DIM_MIN_N;
m<=DIM_MAX_M && n<=DIM_MAX_N && !testFailed;
m+=DIM_INC_M, n+=DIM_INC_N)
{
dgeinit(m, n, A, incRowA, incColA, alpha==0);
dinit(n, x, incX, alpha==0);
dinit(m, y0, incY, beta==0);
dcopy(m, y0, incY, yRef, incY);
dgemv_ref(m, n, alpha, A, incRowA, incColA, x, incX, beta, yRef, incY);
double err1 = 0;
double t1 = 0;
size_t r1 = 0;
while (t1<0.5 && r1<5) {
dcopy(m, y0, incY, yTst, incY);
double t = walltime();
dgemv_col(m, n, alpha,
A, incRowA, incColA,
x, incX,
beta,
yTst, incY);
t1 += t - walltime();
++r1;
}
t1 /= r1;
daxpy(m, -1, yRef, incY, yTst, incY);
err1 = dnrminf(m, yRef, incY, yTst, incY);
double err2 = 0;
double t2 = 0;
size_t r2 = 0;
while (t2<0.5 && r2<5) {
dcopy(m, y0, incY, yTst, incY);
double t = walltime();
dgemv_col(m, n, alpha,
A, incRowA, incColA,
x, incX,
beta,
yTst, incY);
t2 += t - walltime();
++r2;
}
t2 /= r2;
daxpy(m, -1, yRef, incY, yTst, incY);
err1 = dnrminf(m, yRef, incY, yTst, incY);
}
free(yTst);
free(yRef);
free(y0);
free(x);
free(A);
}