#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
#ifndef COLMAJOR
#define COLMAJOR 0
#else
#undef COLMAJOR
#define COLMAJOR 1
#endif
#ifndef MAXDIM_M
#define MAXDIM_M 4000
#endif
#ifndef MAXDIM_N
#define MAXDIM_N 4000
#endif
#ifndef MAXDIM_K
#define MAXDIM_K 4000
#endif
#ifndef MIN_M
#define MIN_M 100
#endif
#ifndef MIN_N
#define MIN_N 100
#endif
#ifndef MIN_K
#define MIN_K 100
#endif
#ifndef MAX_M
#define MAX_M 4000
#endif
#ifndef MAX_N
#define MAX_N 4000
#endif
#ifndef MAX_K
#define MAX_K 4000
#endif
#ifndef INC_M
#define INC_M 100
#endif
#ifndef INC_N
#define INC_N 100
#endif
#ifndef INC_K
#define INC_K 100
#endif
#ifndef ALPHA
#define ALPHA 1.0
#endif
#ifndef BETA
#define BETA 0.0
#endif
double A[MAXDIM_M*MAXDIM_K];
double B[MAXDIM_K*MAXDIM_N];
double C1[MAXDIM_M*MAXDIM_N];
double C2[MAXDIM_M*MAXDIM_N];
double C3[MAXDIM_M*MAXDIM_N];
double C4[MAXDIM_M*MAXDIM_N];
double
walltime()
{
struct tms ts;
static double ClockTick=0.0;
if (ClockTick==0.0) {
ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
}
return ((double) times(&ts)) * ClockTick;
}
void
initMatrix(long m, long n, double *A, long incRowA, long incColA)
{
int i, j;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
A[i*incRowA+j*incColA] = ((double)rand() - RAND_MAX/2)*200/RAND_MAX;
}
}
}
void
dgecopy(long m, long n,
const double *A, long incRowA, long incColA,
double *B, long incRowB, long incColB)
{
int i, j;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
B[i*incRowB+j*incColB] = A[i*incRowA+j*incColA];
}
}
}
void
dgeaxpy(long m, long n,
double alpha,
const double *X, long incRowX, long incColX,
double *Y, long incRowY, long incColY)
{
long i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
Y[i*incRowY+j*incColY] += alpha*X[i*incRowX+j*incColX];
}
}
}
double
asumDiffMatrix(long m, long n,
const double *A, long incRowA, long incColA,
double *B, long incRowB, long incColB)
{
int i, j;
double asum = 0;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
asum += fabs(B[i*incRowB+j*incColB] - A[i*incRowA+j*incColA]);
}
}
return asum;
}
//------------------------------------------------------------------------------
// 1. GEMM Variante: Schulmethode
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// 2. GEMM Variante: Zeilenweise
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// 3. GEMM Variante: Spaltenweise
//------------------------------------------------------------------------------
//------------------------------------------------------------------------------
// 4. GEMM Variante: Bisschen mit Puffer
//------------------------------------------------------------------------------
int
main()
{
initMatrix(MAXDIM_M, MAXDIM_K, A, 1, MAXDIM_M);
initMatrix(MAXDIM_K, MAXDIM_N, B, 1, MAXDIM_K);
initMatrix(MAXDIM_M, MAXDIM_N, C1, 1, MAXDIM_M);
dgecopy(MAXDIM_M, MAXDIM_N, C1, 1, MAXDIM_M, C2, 1, MAXDIM_M);
dgecopy(MAXDIM_M, MAXDIM_N, C1, 1, MAXDIM_M, C3, 1, MAXDIM_M);
dgecopy(MAXDIM_M, MAXDIM_N, C1, 1, MAXDIM_M, C4, 1, MAXDIM_M);
long m, n, k;
// Header-Zeile für die Ausgabe
for (m = MIN_M, n = MIN_N, k = MIN_K;
m <=MAX_M && n <= MAX_N && k <= MAX_K;
m += INC_M, n += INC_N, k += INC_K)
{
double t0, t1, t2, t3, t4;
long incRowA = (COLMAJOR) ? 1 : k;
long incColA = (COLMAJOR) ? m : 1;
long incRowB = (COLMAJOR) ? 1 : n;
long incColB = (COLMAJOR) ? k : 1;
long incRowC = (COLMAJOR) ? 1 : n;
long incColC = (COLMAJOR) ? m : 1;
t0 = walltime();
// GEMM Variante 1 hier aufrufen
t1 = walltime() - t0;
// Weitere Varianten aufrufen ....
// Audgabe: Dimensionen, Zeit, MFLOPS, ...
}
}