Content

Ein erster Benchmark

Am Beispiel der AXPY-Operation soll ein erster Benchmark durchgeführt werden. Die Zutaten für einen Benchmark sind

Matrizen und Vektoren mit Zufallszahlen

Füllt die Matrix mit Zufallszahlen. Die Funktion rand() gibt Integer-Werte im Bereich 0,...,RAND_MAX-1 zurück.

void
randGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;
    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            A[i*incRowA+j*incColA] = ((double)rand()-RAND_MAX/2)*200/RAND_MAX;
        }
    }
}

Wall-Time

Liefert die aktuelle Wall-Time:

#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

double
walltime()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}

Fehlerschranke

double
err_daxpy(int n, double alpha,
          const double *x, int incX,
          const double *y, int incY,
          const double *z_, int incZ_,
          double *z, int incZ)
{
    double normX = damax(n, x, incX);
    double normY = damax(n, y, incY);
    double normD;

    daxpy(n, -1.0, z_, incZ_, z, incZ);
    normD = damax(n, z, incZ);

    return normD/(fabs(alpha)*n*normX*normY);
}

Das gesamte Benchmark-Programm

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

double
walltime()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}

void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            A[i*incRowA+j*incColA] = i*n + j + 1;
        }
    }
}

void
randGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;
    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            A[i*incRowA+j*incColA] = ((double)rand()-RAND_MAX/2)*200/RAND_MAX;
        }
    }
}

void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            printf("%10.4lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n\n");
}

//-- BLAS level 1 procedures and functions -------------------------------------

double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
    int     i;
    double  alpha = 0;

    for (i=0; i<n; ++i) {
        alpha += x[i*incX]*y[i*incY];

    }
    return alpha;
}

void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
    int i;

    if (alpha==0) {
        return;
    }
    for (i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

void
dscal(int n, double alpha, double *x, int incX)
{
    int i;

    if (alpha==1) {
        return;
    }
    for (i=0; i<n; ++i) {
        x[i*incX] *= alpha;
    }
}

void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        y[i*incY] = x[i*incX];
    }
}

void
dswap(int n, double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        double tmp;

        tmp = x[i*incX];
        x[i*incX] = y[i*incY];
        y[i*incY] = tmp;
    }
}

double
damax(int n, const double *x, int incX)
{
    int     i;
    double  result = 0;

    for (i=0; i<n; ++i) {
        if (fabs(x[i*incX])>result) {
            result = fabs(x[i*incX]);
        }
    }
    return result;
}

double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
    int     i, j;
    double  result = 0;

    for (j=0; j<n; ++j) {
        double sum = 0;
        for (i=0; i<m; ++i) {
            sum += fabs(A[i*incRowA+j*incColA]);
        }
        if (sum>result) {
            result = sum;
        }
    }
    return result;
}

//-- error bounds for BLAS level 1 ---------------------------------------------

double
err_daxpy(int n, double alpha,
          const double *x, int incX,
          const double *y, int incY,
          const double *z_, int incZ_,
          double *z, int incZ)
{
    double normX = damax(n, x, incX);
    double normY = damax(n, y, incY);
    double normD;

    daxpy(n, -1.0, z_, incZ_, z, incZ);
    normD = damax(n, z, incZ);

    return normD/(fabs(alpha)*n*normX*normY);
}

//-- axpy for testing ----------------------------------------------------------

void
daxpy_test(int n, double alpha, const double *x, int incX, double *y, int incY)
{
    // We try to make it slower ... (so that you see a difference).
    int i;

    if (alpha==0) {
        return;
    }
    for (i=0; i<n; i+=2) {
        y[i*incY] += alpha*x[i*incX];
    }
    for (i=1; i<n; i+=2) {
        y[i*incY] += alpha*x[i*incX];
    }
}

#ifndef N_MIN
#define N_MIN  10000
#endif

#ifndef N_MAX
#define N_MAX  30000
#endif

#ifndef N_INC
#define N_INC  1000
#endif

#ifndef INC_X
#define INC_X  1
#endif

#ifndef INC_Y
#define INC_Y  1
#endif

#ifndef ALPHA
#define ALPHA  1.0
#endif

#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif

#if (ROWMAJOR==1)
#   define INCROW_A  DIM_N
#   define INCCOL_A  1
#else
#   define INCROW_A  1
#   define INCCOL_A  DIM_M
#endif

double x[N_MAX*INC_X];
double y[N_MAX];
double z[N_MAX*INC_Y];
double z_[N_MAX*INC_Y];

int
main()
{
    int n;

    randGeMatrix(N_MAX, 1, x, INC_X, 0);
    randGeMatrix(N_MAX, 1, y, 1, 0);

    for (n=N_MIN; n<=N_MAX; n+=N_INC) {
        int     runs = 1;
        double  t0, t1, dt, err;

        printf("%5d %5d %5d %5.2lf", n, INC_X, INC_Y, ALPHA);

        t0   = 0;
        runs = 0;
        do {
            dcopy(n, y, 1, z_, INC_Y);
            dt = walltime();
            daxpy(n, ALPHA, x, INC_X, z_, INC_Y);
            dt = walltime() - dt;
            t0 += dt;
            ++runs;
        } while (t0<0.3);
        t0 /= runs;

        printf(" %8.2e %8.2lf", t0, n/(1000*1000*t0));

        t1   = 0;
        runs = 0;
        do {
            dcopy(n, y, 1, z, INC_Y);
            dt = walltime();
            daxpy(n, ALPHA, x, INC_X, z, INC_Y);
            dt = walltime() - dt;
            t1 += dt;
            ++runs;
        } while (t1<0.3);
        t1 /= runs;

        printf(" %8.2e %8.2lf", t1, n/(1000*1000*t1));

        err = err_daxpy(n, ALPHA, x, INC_X, y, 1, z_, INC_Y, z, INC_Y);

        printf(" %8.3e", err);
        printf("\n");
    }

    return 0;
}

Benchmark durchführen

$shell> gcc -Wall -O1 -o axpy_bench  axpy_bench.c
$shell> ./axpy_bench > report.axpy
$shell> cat report.axpy
10000     1     1  1.00 9.08e-06  1101.65 7.87e-06  1270.60 0.000e+00
11000     1     1  1.00 1.17e-05   937.52 1.11e-05   987.52 0.000e+00
12000     1     1  1.00 1.29e-05   926.92 1.04e-05  1152.28 0.000e+00
13000     1     1  1.00 1.10e-05  1181.20 1.27e-05  1023.85 0.000e+00
14000     1     1  1.00 1.30e-05  1080.61 1.30e-05  1073.05 0.000e+00
15000     1     1  1.00 1.28e-05  1170.45 1.37e-05  1095.65 0.000e+00
16000     1     1  1.00 1.58e-05  1013.47 1.59e-05  1003.20 0.000e+00
17000     1     1  1.00 1.92e-05   885.43 1.66e-05  1025.98 0.000e+00
18000     1     1  1.00 1.78e-05  1009.86 1.79e-05  1004.28 0.000e+00
19000     1     1  1.00 1.95e-05   975.97 1.89e-05  1007.80 0.000e+00
20000     1     1  1.00 1.82e-05  1100.00 1.84e-05  1084.19 0.000e+00
21000     1     1  1.00 1.51e-05  1394.75 1.79e-05  1172.85 0.000e+00
22000     1     1  1.00 2.19e-05  1005.77 2.07e-05  1065.01 0.000e+00
23000     1     1  1.00 2.19e-05  1051.02 2.23e-05  1030.55 0.000e+00
24000     1     1  1.00 1.05e-05  2275.76 2.09e-05  1150.84 0.000e+00
25000     1     1  1.00 2.52e-05   992.58 2.61e-05   958.95 0.000e+00
26000     1     1  1.00 2.77e-05   938.51 2.29e-05  1136.03 0.000e+00
27000     1     1  1.00 2.62e-05  1031.57 2.45e-05  1100.47 0.000e+00
28000     1     1  1.00 2.89e-05   968.15 2.96e-05   946.49 0.000e+00
29000     1     1  1.00 3.21e-05   903.49 2.88e-05  1007.94 0.000e+00
30000     1     1  1.00 2.91e-05  1030.16 2.90e-05  1034.03 0.000e+00
$shell> 

Plot-Skript

set terminal svg size 900, 500
set output "bench.axpy.svg"
set xlabel "Vector length N"
set ylabel "MFLOPS"
set title "AXPY"
set key outside
set pointsize 0.5
plot "report.axpy" using 1:6 with linespoints lt 2 lw 3 title "AXPY-ref", \
     "report.axpy" using 1:8 with linespoints lt 3 lw 3 title "AXPY-test"

Resulate plotten

$shell> gnuplot plot.axpy
$shell>