#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
double
walltime()
{
struct tms ts;
static double ClockTick=0.0;
if (ClockTick==0.0) {
ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
}
return ((double) times(&ts)) * ClockTick;
}
void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = i*n + j + 1;
}
}
}
void
randGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
A[i*incRowA+j*incColA] = ((double)rand()-RAND_MAX/2)*200/RAND_MAX;
}
}
}
void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
printf("%10.4lf ", A[i*incRowA+j*incColA]);
}
printf("\n");
}
printf("\n\n");
}
//-- BLAS level 1 procedures and functions -------------------------------------
double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
int i;
double alpha = 0;
for (i=0; i<n; ++i) {
alpha += x[i*incX]*y[i*incY];
}
return alpha;
}
void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
int i;
if (alpha==0) {
return;
}
for (i=0; i<n; ++i) {
y[i*incY] += alpha*x[i*incX];
}
}
void
dscal(int n, double alpha, double *x, int incX)
{
int i;
if (alpha==1) {
return;
}
for (i=0; i<n; ++i) {
x[i*incX] *= alpha;
}
}
void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
int i;
for (i=0; i<n; ++i) {
y[i*incY] = x[i*incX];
}
}
void
dswap(int n, double *x, int incX, double *y, int incY)
{
int i;
for (i=0; i<n; ++i) {
double tmp;
tmp = x[i*incX];
x[i*incX] = y[i*incY];
y[i*incY] = tmp;
}
}
double
damax(int n, const double *x, int incX)
{
int i;
double result = 0;
for (i=0; i<n; ++i) {
if (fabs(x[i*incX])>result) {
result = fabs(x[i*incX]);
}
}
return result;
}
double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
double result = 0;
for (j=0; j<n; ++j) {
double sum = 0;
for (i=0; i<m; ++i) {
sum += fabs(A[i*incRowA+j*incColA]);
}
if (sum>result) {
result = sum;
}
}
return result;
}
//-- error bounds for BLAS level 1 ---------------------------------------------
double
err_daxpy(int n, double alpha,
const double *x, int incX,
const double *y, int incY,
const double *z_, int incZ_,
double *z, int incZ)
{
double normX = damax(n, x, incX);
double normY = damax(n, y, incY);
double normD;
daxpy(n, -1.0, z_, incZ_, z, incZ);
normD = damax(n, z, incZ);
return normD/(fabs(alpha)*n*normX*normY);
}
//-- axpy for testing ----------------------------------------------------------
void
daxpy_test(int n, double alpha, const double *x, int incX, double *y, int incY)
{
// We try to make it slower ... (so that you see a difference).
int i;
if (alpha==0) {
return;
}
for (i=0; i<n; i+=2) {
y[i*incY] += alpha*x[i*incX];
}
for (i=1; i<n; i+=2) {
y[i*incY] += alpha*x[i*incX];
}
}
#ifndef N_MIN
#define N_MIN 10000
#endif
#ifndef N_MAX
#define N_MAX 30000
#endif
#ifndef N_INC
#define N_INC 1000
#endif
#ifndef INC_X
#define INC_X 1
#endif
#ifndef INC_Y
#define INC_Y 1
#endif
#ifndef ALPHA
#define ALPHA 1.0
#endif
#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif
#if (ROWMAJOR==1)
# define INCROW_A DIM_N
# define INCCOL_A 1
#else
# define INCROW_A 1
# define INCCOL_A DIM_M
#endif
double x[N_MAX*INC_X];
double y[N_MAX];
double z[N_MAX*INC_Y];
double z_[N_MAX*INC_Y];
int
main()
{
int n;
randGeMatrix(N_MAX, 1, x, INC_X, 0);
randGeMatrix(N_MAX, 1, y, 1, 0);
for (n=N_MIN; n<=N_MAX; n+=N_INC) {
int runs = 1;
double t0, t1, dt, err;
printf("%5d %5d %5d %5.2lf", n, INC_X, INC_Y, ALPHA);
t0 = 0;
runs = 0;
do {
dcopy(n, y, 1, z_, INC_Y);
dt = walltime();
daxpy(n, ALPHA, x, INC_X, z_, INC_Y);
dt = walltime() - dt;
t0 += dt;
++runs;
} while (t0<0.3);
t0 /= runs;
printf(" %8.2e %8.2lf", t0, n/(1000*1000*t0));
t1 = 0;
runs = 0;
do {
dcopy(n, y, 1, z, INC_Y);
dt = walltime();
daxpy(n, ALPHA, x, INC_X, z, INC_Y);
dt = walltime() - dt;
t1 += dt;
++runs;
} while (t1<0.3);
t1 /= runs;
printf(" %8.2e %8.2lf", t1, n/(1000*1000*t1));
err = err_daxpy(n, ALPHA, x, INC_X, y, 1, z_, INC_Y, z, INC_Y);
printf(" %8.3e", err);
printf("\n");
}
return 0;
}