#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
#include <math.h>
#ifndef MINDIM_M
#define MINDIM_M 1000
#endif
#ifndef MINDIM_N
#define MINDIM_N 1000
#endif
#ifndef MAXDIM_M
#define MAXDIM_M 5000
#endif
#ifndef MAXDIM_N
#define MAXDIM_N 5000
#endif
#ifndef INC_M
#define INC_M 100
#endif
#ifndef INC_N
#define INC_N 100
#endif
#ifndef MIN_T
#define MIN_T 1
#endif
double A[MAXDIM_M*MAXDIM_K];
double B[MAXDIM_K*MAXDIM_N];
double C[MAXDIM_M*MAXDIM_N];
double C0[MAXDIM_M*MAXDIM_N];
double C1[MAXDIM_M*MAXDIM_N];
double C2[MAXDIM_M*MAXDIM_N];
double
walltime()
{
struct tms ts;
static double ClockTick=0.0;
if (ClockTick==0.0) {
ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
}
return ((double) times(&ts)) * ClockTick;
}
void
gecopy(long m, long n, const double *A, long incRowA, long incColA,
double *B, long incRowB, long incColB)
{
long i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
B[i*incRowB+j*incColB] = A[i*incRowA+j*incColA];
}
}
}
double
asumDiff(long m, long n, const double *A, long incRowA, long incColA,
const double *B, long incRowB, long incColB)
{
double diff = 0;
long i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
diff += fabs(B[i*incRowB+j*incColB] - A[i*incRowA+j*incColA]);
}
}
return diff;
}
void
initMatrix(long m, long n, double *A, long incRowA, long incColA)
{
long i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = ((double)rand() - RAND_MAX/2)*200/RAND_MAX;
}
}
}
//------------------------------------------------------------------------------
void
gemm_ilj(long m, long n, long k, double alpha,
const double *A, long incRowA, long incColA,
const double *B, long incRowB, long incColB,
double beta,
const double *C, long incRowC, long incColC)
{
long i, j, l;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
C[i*incRowA+j*incColA] *= beta;
}
}
for (i=0; i<m; ++i) {
for (l=0; l<k; ++l) {
for (j=0; j<n; ++j) {
C[i*incRowA+j*incColA] += alpha*A[i*incRowA+l*incColA]
*B[l*incRowB+j*incColB];
}
}
}
}
void
gemm_jli(long m, long n, long k, double alpha,
const double *A, long incRowA, long incColA,
const double *B, long incRowB, long incColB,
double beta,
const double *C, long incRowC, long incColC)
{
long i, j, l;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
C[i*incRowA+j*incColA] *= beta;
}
}
for (j=0; j<n; ++j) {
for (l=0; l<k; ++l) {
for (i=0; i<m; ++i) {
C[i*incRowA+j*incColA] += alpha*A[i*incRowA+l*incColA]
*B[l*incRowB+j*incColB];
}
}
}
}
int
main()
{
long m, n;
double t0, t1, t2, diff;
printf("# M N t1 (colMajor) t2 (rowMajor) t2/t1 diff\n");
printf("#================================================================\n");
initMatrix(MAXDIM_M, MAXDIM_K, A, 1, MAXDIM_M);
initMatrix(MAXDIM_K, MAXDIM_N, B, 1, MAXDIM_K);
initMatrix(MAXDIM_M, MAXDIM_N, C, 1, MAXDIM_M);
for (m=MINDIM_M, n=MINDIM_N; m<=MAXDIM_M && n<MAXDIM_N; m+=INC_M, n+=INC_N) {
printf("%4ld %4ld ", m, n);
gecopy(m, n, C, incRowC, incColC, C0, incRowC, incColC);
t1 = 0;
do {
t0 = walltime();
gemm_ilj(m, n, k,
alpha,
A, incRowA, incColA,
B, incRowB, incColB,
beta,
C0, incRowC, inColC);
t1 += walltime() - t0;
} while (t1<MIN_T);
gecopy(m, n, C0, incRowC, incColC, C1, incRowC, incColC);
printf(" %12.2lf", t1);
gecopy(m, n, C, incRowC, incColC, C0, incRowC, incColC);
t2 = 0;
do {
t0 = walltime();
gemm_jli(m, n, k,
alpha,
A, incRowA, incColA,
B, incRowB, incColB,
beta,
C0, incRowC, inColC);
t2 += walltime() - t0;
} while (t2<MIN_T);
gecopy(m, n, C0, incRowC, incColC, C2, incRowC, incColC);
diff = asumDiff(m, n, buffer1, 1, m, buffer2, n, 1);
printf(" %12.2lf %16.2lf", t2, t2/t1);
printf(" %11.2le", diff);
printf("\n");
}
return 0;
}