Content

Lösungsvorschlag

Test nach Augenmaß

#include <stdio.h>
#include <math.h>

//-- setup and print matrices --------------------------------------------------

void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            A[i*incRowA+j*incColA] = i*n + j + 1;
        }
    }
}

void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            printf("%10.4lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n\n");
}

//-- some BLAS Level 1 procedures and functions --------------------------------

double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
    int     i;
    double  alpha = 0;

    for (i=0; i<n; ++i) {
        alpha += x[i*incX]*y[i*incY];

    }
    return alpha;
}

void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
    int i;

    if (alpha==0) {
        return;
    }
    for (i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

void
dscal(int n, double alpha, double *x, int incX)
{
    int i;

    if (alpha==1) {
        return;
    }
    for (i=0; i<n; ++i) {
        x[i*incX] *= alpha;
    }
}

void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        y[i*incY] = x[i*incX];
    }
}

void
dswap(int n, double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        double tmp;

        tmp = x[i*incX];
        x[i*incX] = y[i*incY];
        y[i*incY] = tmp;
    }
}

double
damax(int n, const double *x, int incX)
{
    int     i;
    double  result = 0;

    for (i=0; i<n; ++i) {
        if (fabs(x[i*incX])>result) {
            result = fabs(x[i*incX]);
        }
    }
    return result;
}

double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
    int     i, j;
    double  result = 0;

    for (j=0; j<n; ++j) {
        double sum = 0;
        for (i=0; i<m; ++i) {
            sum += fabs(A[i*incRowA+j*incColA]);
        }
        if (sum>result) {
            result = sum;
        }
    }
    return result;
}

//-- GEMV implementations ------------------------------------------------------

void
dgemv_col(int m, int n, double alpha,
          const double *A, int incRowA, int incColA,
          const double *x, int incX,
          double beta,
          double *y, int incY)
{
    int j;

    dscal(m, beta, y, incY);

    for (j=0; j<n; ++j) {
        daxpy(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
    }
}

void
dgemv_row(int m, int n, double alpha,
          const double *A, int incRowA, int incColA,
          const double *x, int incX,
          double beta,
          double *y, int incY)
{
    int i;

    for (i=0; i<m; ++i) {
        double dot = ddot(n, &A[i*incRowA], incColA, x, incX);

        y[i*incY] = beta*y[i*incY] + alpha*dot;
    }
}

//------------------------------------------------------------------------------

#ifndef DIM_M
#define DIM_M 5
#endif

#ifndef DIM_N
#define DIM_N 6
#endif

#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif

#if (ROWMAJOR==1)
#   define INCROW_A  DIM_N
#   define INCCOL_A  1
#else
#   define INCROW_A  1
#   define INCCOL_A  DIM_M
#endif

#ifndef INC_X
#define INC_X 1
#endif

#ifndef INC_Y
#define INC_Y 1
#endif

#ifndef ALPHA
#define ALPHA 2
#endif

#ifndef BETA
#define BETA 3
#endif


//
//  A, x, y_ will get initialized with initGeMatrix
//
double A[DIM_M*DIM_N];
double x[INC_X*DIM_N];
double y_[DIM_M];

//
// If BETA is not zero the result of BETA*y + ALPHA * A *x depends on the
// initial value of y.  We initialize y with y_ before each call of a gemv
// implementation.  So the results are always the same.
//
double y[INC_Y*DIM_M];

int
main()
{
    initGeMatrix(DIM_M, DIM_N, A, INCROW_A, INCCOL_A);
    initGeMatrix(DIM_N, 1, x, INC_X, 0);
    initGeMatrix(DIM_M, 1, y_, 1, 0);

    printf("A =\n");
    printGeMatrix(DIM_M, DIM_N, A, INCROW_A, INCCOL_A);

    printf("x =\n");
    printGeMatrix(DIM_N, 1, x, INC_X, 0);

    printf("y =\n");
    printGeMatrix(DIM_M, 1, y_, INC_Y, 0);

    //
    //  Aufruf von gemv_col
    //
    printf("gemv_col: %10.4lf * y + %10.4lf * A * x =\n\n",
           (double)BETA, (double)ALPHA);

    dcopy(DIM_M, y_, 1, y, INC_Y);
    dgemv_col(DIM_M, DIM_N,
              ALPHA,
              A, INCROW_A, INCCOL_A,
              x, INC_X,
              BETA,
              y, INC_Y);

    printGeMatrix(DIM_M, 1, y, INC_Y, 0);

    //
    //  Aufruf von gemv_row
    //
    printf("gemv_row: %10.4lf * y + %10.4lf * A * x =\n\n",
           (double)BETA, (double)ALPHA);

    dcopy(DIM_M, y_, 1, y, INC_Y);
    dgemv_row(DIM_M, DIM_N,
              ALPHA,
              A, INCROW_A, INCCOL_A,
              x, INC_X,
              BETA,
              y, INC_Y);

    printGeMatrix(DIM_M, 1, y, INC_Y, 0);


    return 0;
}

Übersetzten und ausführen

$shell> gcc -Wall gemv_test_sol.c
$shell> ./a.out
A =
    1.0000     2.0000     3.0000     4.0000     5.0000     6.0000 
    7.0000     8.0000     9.0000    10.0000    11.0000    12.0000 
   13.0000    14.0000    15.0000    16.0000    17.0000    18.0000 
   19.0000    20.0000    21.0000    22.0000    23.0000    24.0000 
   25.0000    26.0000    27.0000    28.0000    29.0000    30.0000 


x =
    1.0000 
    2.0000 
    3.0000 
    4.0000 
    5.0000 
    6.0000 


y =
    1.0000 
    2.0000 
    3.0000 
    4.0000 
    5.0000 


gemv_col:     3.0000 * y +     2.0000 * A * x =

  185.0000 
  440.0000 
  695.0000 
  950.0000 
 1205.0000 


gemv_row:     3.0000 * y +     2.0000 * A * x =

  185.0000 
  440.0000 
  695.0000 
  950.0000 
 1205.0000 
$shell> 

Benchmark

Übersetzen

$shell> gcc -Wall -O1 -DROWMAJOR=1 -o gemv_bench_row_O1 gemv_bench_sol.c
$shell> gcc -Wall -O2 -DROWMAJOR=1 -o gemv_bench_row_O2 gemv_bench_sol.c
$shell> gcc -Wall -O3 -DROWMAJOR=1 -o gemv_bench_row_O3 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=1 -o gemv_bench_row_Ofast gemv_bench_sol.c
$shell> gcc -Wall -O1 -DROWMAJOR=0 -o gemv_bench_col_O1 gemv_bench_sol.c
$shell> gcc -Wall -O2 -DROWMAJOR=0 -o gemv_bench_col_O2 gemv_bench_sol.c
$shell> gcc -Wall -O3 -DROWMAJOR=0 -o gemv_bench_col_O3 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -o gemv_bench_col_Ofast gemv_bench_sol.c
$shell> 

Ausführen

$shell> gemv_bench_row_O1 > report.gemv.row_O1
$shell> gemv_bench_row_O2 > report.gemv.row_O2
$shell> gemv_bench_row_O3 > report.gemv.row_O3
$shell> gemv_bench_row_Ofast > report.gemv.row_Ofast
$shell> gemv_bench_col_O1 > report.gemv.col_O1
$shell> gemv_bench_col_O2 > report.gemv.col_O2
$shell> gemv_bench_col_O3 > report.gemv.col_O3
$shell> gemv_bench_col_Ofast > report.gemv.col_Ofast
$shell> 

Plots erzeugen

$shell> gnuplot plot.gemv_row
$shell> gnuplot plot.gemv_col
$shell> 

Plots anschauen

ColMajor Benchmarks

RowMajor Benchmarks