#include <ulmaux.h>
#include <ulmblas.h>
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <string.h>
void
dgemv_ref(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
double beta,
double *y, ptrdiff_t incY)
{
if (m==0 || n==0) {
return;
}
if (beta!=1) {
if (beta!=0) {
for (size_t i=0; i<m; ++i) {
y[i*incY] *= beta;
}
} else {
for (size_t i=0; i<m; ++i) {
y[i*incY] = 0;
}
}
}
if (alpha!=0) {
for (size_t j=0; j<n; ++j) {
for (size_t i=0; i<m; ++i) {
y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
}
}
}
}
double
dgemv_err(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
const double *y0, ptrdiff_t incY0,
double beta,
const double *yRef, ptrdiff_t incYRef,
double *ySol, ptrdiff_t incYSol)
{
if (m==0 || n==0) {
return 0;
}
double nrmY0 = beta==0 ? 0 : dgenrm_inf(m, 1, y0, incY0, 1);
double nrmX = alpha==0 ? 0 : dgenrm_inf(n, 1, x, incX, 1);
double nrmA = alpha==0 ? 0 : dgenrm_inf(m, n, A, incRowA, incColA);
size_t maxMN = m<n ? n : m;
daxpy(m, -1, yRef, incYRef, ySol, incYSol);
double nrmDiff = dgenrm_inf(m, 1, ySol, incYSol, 1);
if (nrmDiff==0) {
return 0;
}
return nrmDiff /
(DBL_EPSILON*(maxMN*fabs(alpha)*nrmA*nrmX + m*fabs(beta)*nrmY0));
}
bool
get_next_param(bool reset, size_t *m, size_t *n, double *alpha,
ptrdiff_t *incRowA, ptrdiff_t *incColA, double *beta,
ptrdiff_t *incX, ptrdiff_t *incY)
{
static size_t dim[] = {0, 1, 32, 31};
static ptrdiff_t inc[] = {1, 2, -1, -2};
static double scalar[] = {0, 1, 2, -1, -2, 1.5, 2.5, -1.5};
static bool colMajor;
static size_t idx_m;
static size_t idx_n;
static size_t idx_alpha;
static size_t idx_incRowA;
static size_t idx_incColA;
static size_t idx_beta;
static size_t idx_incX;
static size_t idx_incY;
if (reset) {
colMajor = false;
idx_m = 0;
idx_n = 0;
idx_alpha = 0;
idx_incRowA = 0;
idx_incColA = 0;
idx_beta = 0;
idx_incX = 0;
idx_incY = 0;
}
*m = dim[idx_m];
*n = dim[idx_n];
*alpha = scalar[idx_alpha];
*beta = scalar[idx_beta];
*incX = inc[idx_incX];
*incY = inc[idx_incY];
if (colMajor) {
*incRowA = inc[idx_incRowA];
*incColA = dim[idx_m]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
} else {
*incRowA = dim[idx_n]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
*incColA = inc[idx_incColA];
}
if (!colMajor) {
colMajor = true;
return true;
}
colMajor = false;
if (++idx_incRowA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incRowA = 0;
if (++idx_incColA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incColA = 0;
if (++idx_m < sizeof(dim)/sizeof(size_t)) {
return true;
}
idx_m = 0;
if (++idx_n < sizeof(dim)/sizeof(size_t)) {
return true;
}
idx_n = 0;
if (++idx_alpha < sizeof(scalar)/sizeof(double)) {
return true;
}
idx_alpha = 0;
if (++idx_beta < sizeof(scalar)/sizeof(double)) {
return true;
}
idx_beta = 0;
if (++idx_incX < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incX = 0;
if (++idx_incY < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incY = 0;
return false;
}
#ifndef SEED_RAND
#define SEED_RAND 0
#endif
#ifndef TOL_ERR
#define TOL_ERR 2
#endif
void
check_dgemv()
{
size_t m, n;
ptrdiff_t incRowA, incColA, incX, incY;
double alpha, beta;
bool more, reset = true;
srand(SEED_RAND);
printf("%8s %8s %8s %8s %8s %8s %8s %8s %8s %8s\n",
"m", "n", "alpha", "incRowA", "incColA", "beta", "incX", "incY",
"err", "res");
do {
more = get_next_param(reset, &m, &n, &alpha, &incRowA, &incColA,
&beta, &incX, &incY);
reset = false;
size_t bufsize_A = 1 + m*ptrdiff_abs(incRowA) * n*ptrdiff_abs(incColA);
size_t bufsize_x = 1 + n*ptrdiff_abs(incX);
size_t bufsize_y = 1 + m*ptrdiff_abs(incY);
double *buf_A = malloc(bufsize_A*sizeof(double));
double *buf_x = malloc(bufsize_x*sizeof(double));
double *buf_y0 = malloc(bufsize_y*sizeof(double));
double *buf_yRef = malloc(bufsize_y*sizeof(double));
double *buf_ySol = malloc(bufsize_y*sizeof(double));
if (!buf_A || !buf_x || !buf_y0 || !buf_yRef || !buf_ySol) {
abort();
}
double *A = buf_A;
double *x = buf_x;
double *y0 = buf_y0;
double *yRef = buf_yRef;
double *ySol = buf_ySol;
if (incRowA<0) {
A -= m*incRowA;
}
if (incColA<0) {
A -= n*incColA;
}
if (incX<0) {
x -= n*incX;
}
if (incY<0) {
y0 -= m*incY;
yRef -= m*incY;
ySol -= m*incY;
}
randDGeMatrix(m, n, alpha==0, A, incRowA, incColA);
randDGeMatrix(n, 1, alpha==0, x, incX, 0);
randDGeMatrix(m, 1, beta==0, y0, incY, 0);
dcopy(m, y0, incY, yRef, incY);
dgemv_ref(m, n, alpha, A, incRowA, incColA, x, incX, beta, yRef, incY);
dcopy(m, y0, incY, ySol, incY);
dgemv(m, n, alpha, A, incRowA, incColA, x, incX, beta, ySol, incY);
double err = dgemv_err(m, n, alpha,
A, incRowA, incColA,
x, incX,
y0, incY,
beta,
yRef, incY,
ySol, incY);
bool pass = err<TOL_ERR;
printf("%8zu %8zu %8.2lf %8td %8td %8.2lf %8td %8td %7.1e %8s\n",
m, n, alpha, incRowA, incColA, beta, incX, incY,
err, pass ? "PASS" : "FAILED");
if (!pass) {
printf("alpha = %8.2lf, beta = %8.2lf\n", alpha, beta);
printf("y0=\n");
printDGeMatrix(1, m, y0, 0, incY);
printf("A=\n");
printDGeMatrix(m, n, A, incRowA, incColA);
printf("x=\n");
printDGeMatrix(1, n, x, 0, incX);
printf("yRef=\n");
printDGeMatrix(1, m, yRef, 0, incY);
printf("ySol=\n");
printDGeMatrix(1, m, ySol, 0, incY);
break;
}
free(buf_A);
free(buf_x);
free(buf_y0);
free(buf_yRef);
free(buf_ySol);
} while (more);
}
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
#ifndef MAX_M
#define MAX_M 1500
#endif
#ifndef MAX_N
#define MAX_N 1500
#endif
#ifndef ALPHA
#define ALPHA 1
#endif
#ifndef BETA
#define BETA 1
#endif
void
bench_dgemv(int argc, char **argv)
{
bool colmajor = COLMAJOR;
double alpha = ALPHA;
double beta = BETA;
ptrdiff_t incX = 1;
ptrdiff_t incY = 1;
for (int i=0; i<argc; ++i) {
if (!strcmp(argv[i], "colmajor")) {
colmajor = true;
}
if (!strcmp(argv[i], "rowmajor")) {
colmajor = false;
}
if (!strcmp(argv[i], "alpha")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%lf", &alpha)==1);
}
if (!strcmp(argv[i], "beta")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%lf", &beta)==1);
}
if (!strcmp(argv[i], "incx")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incX)==1);
assert(incX>0);
}
if (!strcmp(argv[i], "incy")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incY)==1);
assert(incY>0);
}
}
srand(SEED_RAND);
printf("#COLMAJOR = %d\n", colmajor);
printf("#ALPHA = %lf\n", alpha);
printf("#BETA = %lf\n", beta);
printf("#INCX = %td\n", incX);
printf("#INCY = %td\n", incY);
size_t bufsize_x = MAX_N*ptrdiff_abs(incX);
size_t bufsize_y = MAX_M*ptrdiff_abs(incY);
double *A = malloc(MAX_M*MAX_N*sizeof(double));
double *x = malloc(bufsize_x*sizeof(double));
double *y0 = malloc(bufsize_y*sizeof(double));
double *yRef = malloc(bufsize_y*sizeof(double));
double *ySol = malloc(bufsize_y*sizeof(double));
if (!A || !x || !y0 || !yRef || !ySol) {
abort();
}
printf("#%4s %4s ", "m", "n");
printf("%10s %10s ", "time ref", "mflops ref");
printf("%10s %10s %7s ", "time 1", "mflops 1", "err");
printf("\n");
for (size_t m=100, n=100; m<=MAX_M && n<=MAX_N; m+=100, n+=100) {
ptrdiff_t incRowA = colmajor ? 1 : n;
ptrdiff_t incColA = colmajor ? m : 1;
double mflop = m*(2*n+1)/1000./1000.;
randDGeMatrix(m, n, alpha==0, A, incRowA, incColA);
randDGeMatrix(n, 1, alpha==0, x, incX, 0);
randDGeMatrix(m, 1, beta==0, y0, incY, 0);
printf(" %4zu %4zu ", m, n);
{
double t = 0;
size_t runs = 0;
while (t<0.1 || runs<3) {
dcopy(m, y0, incY, yRef, incY);
double t0 = walltime();
dgemv_ref(m, n,
alpha,
A, incRowA, incColA,
x, incX,
beta,
yRef, incY);
t += walltime() - t0;
++runs;
}
t /= runs;
printf("%10.2lf %10.2lf ", t, mflop/t);
}
{
double t = 0;
size_t runs = 0;
while (t<0.1 || runs<3) {
dcopy(m, y0, incY, ySol, incY);
double t0 = walltime();
dgemv(m, n,
alpha,
A, incRowA, incColA,
x, incX,
beta,
ySol, incY);
t += walltime() - t0;
++runs;
}
t /= runs;
double err = dgemv_err(m, n, alpha,
A, incRowA, incColA,
x, incX,
y0, incY,
beta,
yRef, incY,
ySol, incY);
printf("%10.2lf %10.2lf %7.1e ", t, mflop/t, err);
}
printf("\n");
fflush(stdout);
}
free(A);
free(x);
free(y0);
free(yRef);
free(ySol);
}
int
main(int argc, char **argv)
{
if (argc<2 || (strcmp(argv[1], "check") && strcmp(argv[1], "bench"))) {
fprintf(stderr, "usage:\n");
fprintf(stderr, " %s check\n", argv[0]);
fprintf(stderr, " %s bench [colmajor | rowmajor]"
" [alpha <value>]"
" [beta <value>]"
"\n", argv[0]);
return 1;
}
if (!strcmp(argv[1], "check")) {
check_dgemv();
}
if (!strcmp(argv[1], "bench")) {
bench_dgemv(argc-2, argv+2);
}
}