#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
//-- timer for benchmarks ------------------------------------------------------
double
walltime()
{
struct tms ts;
static double ClockTick=0.0;
if (ClockTick==0.0) {
ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
}
return ((double) times(&ts)) * ClockTick;
}
//-- setup and print matrices --------------------------------------------------
void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = i*n + j + 1;
}
}
}
void
randGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
A[i*incRowA+j*incColA] = ((double)rand()-RAND_MAX/2)*200/RAND_MAX;
}
}
}
void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
printf("%10.4lf ", A[i*incRowA+j*incColA]);
}
printf("\n");
}
printf("\n\n");
}
//-- some BLAS Level 1 procedures and functions --------------------------------
double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
int i;
double alpha = 0;
for (i=0; i<n; ++i) {
alpha += x[i*incX]*y[i*incY];
}
return alpha;
}
void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
int i;
if (alpha==0) {
return;
}
for (i=0; i<n; ++i) {
y[i*incY] += alpha*x[i*incX];
}
}
void
dscal(int n, double alpha, double *x, int incX)
{
int i;
if (alpha==1) {
return;
}
for (i=0; i<n; ++i) {
x[i*incX] *= alpha;
}
}
void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
int i;
for (i=0; i<n; ++i) {
y[i*incY] = x[i*incX];
}
}
void
dswap(int n, double *x, int incX, double *y, int incY)
{
int i;
for (i=0; i<n; ++i) {
double tmp;
tmp = x[i*incX];
x[i*incX] = y[i*incY];
y[i*incY] = tmp;
}
}
double
damax(int n, const double *x, int incX)
{
int i;
double result = 0;
for (i=0; i<n; ++i) {
if (fabs(x[i*incX])>result) {
result = fabs(x[i*incX]);
}
}
return result;
}
double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
double result = 0;
for (j=0; j<n; ++j) {
double sum = 0;
for (i=0; i<m; ++i) {
sum += fabs(A[i*incRowA+j*incColA]);
}
if (sum>result) {
result = sum;
}
}
return result;
}
//-- error bound for gemv ------------------------------------------------------
//
// Vector y0 is the trusted result and vector y1 the vector to be tested.
//
double
err_dgemv(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
const double *y0, int incY0,
double *y1, int incY1)
{
int max_mn = (m>n) ? m : n;
double normA = fabs(alpha)*dgenrm1(m, n, A, incRowA, incColA);
double normX = damax(n, x, incX);
double normY0 = fabs(beta)*damax(m, y0, incY0);
double normD;
daxpy(m, -1.0, y0, incY0, y1, incY1);
normD = damax(n, y1, incY1);
return normD/(normA*normX*normY0*max_mn);
}
//-- Fused BLAS Level 1 procedures and functions -------------------------------
#ifndef DGEMV_MxBS
#define DGEMV_MxBS 4
#endif
void
dgemv_mxBS(int m, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double *y, int incY)
{
// your code here
}
#ifndef DGEMV_BSxN
#define DGEMV_BSxN 4
#endif
void
dgemv_BSxn(int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
// your code here
}
//-- GEMV implementations ------------------------------------------------------
void
dgemv_ref(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
int i, j;
for (i=0; i<m; ++i) {
if (beta==0) {
y[i*incY] = 0;
} else {
y[i*incY] *= beta;
}
for (j=0; j<n; ++j) {
y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
}
}
}
void
dgemv_col(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
int j;
dscal(m, beta, y, incY);
for (j=0; j<n; ++j) {
daxpy(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
}
}
void
dgemv_row(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
int i;
for (i=0; i<m; ++i) {
y[i*incY] *= beta;
y[i*incY] += alpha*ddot(n, &A[i*incRowA], incColA, x, incX);
}
}
void
dgemv_blk_col(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
// your code here
}
void
dgemv_blk_row(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
// your code here
}
//------------------------------------------------------------------------------
#ifndef MIN_M
#define MIN_M 100
#endif
#ifndef MIN_N
#define MIN_N 100
#endif
#ifndef MAX_M
#define MAX_M 8000
#endif
#ifndef MAX_N
#define MAX_N 8000
#endif
#ifndef INC_M
#define INC_M 100
#endif
#ifndef INC_N
#define INC_N 100
#endif
#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif
#if (ROWMAJOR==1)
# define INCROW_A MAX_N
# define INCCOL_A 1
#else
# define INCROW_A 1
# define INCCOL_A MAX_M
#endif
#ifndef INC_X
#define INC_X 1
#endif
#ifndef INC_Y
#define INC_Y 1
#endif
#ifndef ALPHA
#define ALPHA 2
#endif
#ifndef BETA
#define BETA 3
#endif
//
// A, x, y_ will get initialized with initGeMatrix
//
double A[MAX_M*MAX_N];
double x[INC_X*MAX_N];
double y_[MAX_M];
//
// Each implementation gets its own vector for y:
//
double y_0[INC_Y*MAX_M];
double y_1[INC_Y*MAX_M];
double y_2[INC_Y*MAX_M];
int
main()
{
int m, n;
randGeMatrix(MAX_M, MAX_N, A, INCROW_A, INCCOL_A);
randGeMatrix(MAX_N, 1, x, INC_X, 0);
randGeMatrix(MAX_M, 1, y_, 1, 0);
printf("#%9s %9s %9s %9s", "m", "n", "INCROW_A", "INCCOL_A");
printf(" %9s %9s", "INC_X", "INC_Y");
printf(" %9s %9s", "ALPHA", "BETA");
printf(" %12s %12s", "t", "MFLOPS");
printf(" %12s %12s %12s", "t", "MFLOPS", "err");
printf(" %12s %12s %12s", "t", "MFLOPS", "err");
printf(" %12s %12s %12s", "t", "MFLOPS", "err");
printf(" %12s %12s %12s", "t", "MFLOPS", "err");
printf("\n");
for (m=MIN_M, n=MIN_N; (m<=MAX_M) && (n<=MAX_N); m+=INC_M, n+=INC_N) {
int runs = 1;
int ops = m*n + m*(n+1);
double t0, t1, dt, err;
printf(" %9d %9d %9d %9d", m, n, INCROW_A, INCCOL_A);
printf(" %9d %9d", INC_X, INC_Y);
printf(" %9.2lf %9.2lf", (double)ALPHA, (double)BETA);
t0 = 0;
runs = 0;
do {
dcopy(n, y_, 1, y_0, INC_Y);
dt = walltime();
dgemv_ref(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_0, INC_Y);
dt = walltime() - dt;
t0 += dt;
++runs;
} while (t0<0.3);
t0 /= runs;
printf(" %12.2e %12.2lf", t0, ops/(1000*1000*t0));
t1 = 0;
runs = 0;
do {
dcopy(n, y_, 1, y_1, INC_Y);
dt = walltime();
dgemv_col(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_1, INC_Y);
dt = walltime() - dt;
t1 += dt;
++runs;
} while (t1<0.3);
t1 /= runs;
err = err_dgemv(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_0, INC_Y,
y_1, INC_Y);
printf(" %12.2e %12.2lf %12.2e", t1, ops/(1000*1000*t1), err);
t1 = 0;
runs = 0;
do {
dcopy(n, y_, 1, y_2, INC_Y);
dt = walltime();
dgemv_row(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_2, INC_Y);
dt = walltime() - dt;
t1 += dt;
++runs;
} while (t1<0.3);
t1 /= runs;
err = err_dgemv(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_0, INC_Y,
y_2, INC_Y);
printf(" %12.2e %12.2lf %12.2e", t1, ops/(1000*1000*t1), err);
t1 = 0;
runs = 0;
do {
dcopy(n, y_, 1, y_2, INC_Y);
dt = walltime();
dgemv_blk_col(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_2, INC_Y);
dt = walltime() - dt;
t1 += dt;
++runs;
} while (t1<0.3);
t1 /= runs;
err = err_dgemv(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_0, INC_Y,
y_2, INC_Y);
printf(" %12.2e %12.2lf %12.2e", t1, ops/(1000*1000*t1), err);
t1 = 0;
runs = 0;
do {
dcopy(n, y_, 1, y_2, INC_Y);
dt = walltime();
dgemv_blk_row(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_2, INC_Y);
dt = walltime() - dt;
t1 += dt;
++runs;
} while (t1<0.3);
t1 /= runs;
err = err_dgemv(m, n, ALPHA,
A, INCROW_A, INCCOL_A,
x, INC_X,
BETA,
y_0, INC_Y,
y_2, INC_Y);
printf(" %12.2e %12.2lf %12.2e", t1, ops/(1000*1000*t1), err);
printf("\n");
}
return 0;
}