Content

Lösungsvorschlag

Test nach Augenmaß

#include <stdio.h>
#include <math.h>

//-- setup and print matrices --------------------------------------------------

void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            A[i*incRowA+j*incColA] = i*n + j + 1;
        }
    }
}

void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            printf("%10.4lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n\n");
}

//-- some BLAS Level 1 procedures and functions --------------------------------

double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
    int     i;
    double  alpha = 0;

    for (i=0; i<n; ++i) {
        alpha += x[i*incX]*y[i*incY];

    }
    return alpha;
}

void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
    int i;

    if (alpha==0) {
        return;
    }
    for (i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

void
dscal(int n, double alpha, double *x, int incX)
{
    int i;

    if (alpha==1) {
        return;
    }
    for (i=0; i<n; ++i) {
        x[i*incX] *= alpha;
    }
}

void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        y[i*incY] = x[i*incX];
    }
}

void
dswap(int n, double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        double tmp;

        tmp = x[i*incX];
        x[i*incX] = y[i*incY];
        y[i*incY] = tmp;
    }
}

double
damax(int n, const double *x, int incX)
{
    int     i;
    double  result = 0;

    for (i=0; i<n; ++i) {
        if (fabs(x[i*incX])>result) {
            result = fabs(x[i*incX]);
        }
    }
    return result;
}

double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
    int     i, j;
    double  result = 0;

    for (j=0; j<n; ++j) {
        double sum = 0;
        for (i=0; i<m; ++i) {
            sum += fabs(A[i*incRowA+j*incColA]);
        }
        if (sum>result) {
            result = sum;
        }
    }
    return result;
}

//-- Fused BLAS Level 1 procedures and functions -------------------------------

#ifndef DGEMV_MxBS
#define DGEMV_MxBS 4
#endif

void
dgemv_mxBS(int m, double alpha,
           const double *A, int incRowA, int incColA,
           const double *x, int incX,
           double *y, int incY)
{
    int i, j;

    if (alpha==0) {
        return;
    }
    for (i=0; i<m; ++i) {
        double dot = 0;
        for (j=0; j<DGEMV_MxBS; ++j) {
            dot += A[i*incRowA+j*incColA]*x[j*incX];
        }
        y[i*incY] += alpha*dot;
    }
}

#ifndef DGEMV_BSxN
#define DGEMV_BSxN 4
#endif

void
dgemv_BSxn(int n, double alpha,
           const double *A, int incRowA, int incColA,
           const double *x, int incX,
           double beta,
           double *y, int incY)
{
    int i, j;

    if (alpha==0) {
        return;
    }
    if (beta!=1) {
        for (i=0; i<DGEMV_BSxN; ++i) {
            y[i*incY] *= beta;
        }
    }
    for (j=0; j<n; ++j) {
        for (i=0; i<DGEMV_BSxN; ++i) {
            y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
        }
    }
}

//-- GEMV implementations ------------------------------------------------------

void
dgemv_unblk(int m, int n, double alpha,
            const double *A, int incRowA, int incColA,
            const double *x, int incX,
            double beta,
            double *y, int incY)
{
    if (incRowA<incColA) {
        int j;

        dscal(m, beta, y, incY);

        for (j=0; j<n; ++j) {
            daxpy(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
        }
    } else {
        int i;

        for (i=0; i<m; ++i) {
            y[i*incY] *= beta;
            y[i*incY] += alpha*ddot(n, &A[i*incRowA], incColA, x, incX);
        }
    }
}

void
dgemv_blk_col(int m, int n, double alpha,
              const double *A, int incRowA, int incColA,
              const double *x, int incX,
              double beta,
              double *y, int incY)
{
    int bs = DGEMV_MxBS;
    int j;

    dscal(m, beta, y, incY);

    for (j=0; j+bs<=n; j+=bs) {
        dgemv_mxBS(m, alpha,
                   &A[j*incColA], incRowA, incColA,
                   &x[j*incX], incX,
                   y, incY);
    }
    dgemv_unblk(m, n-j, alpha,
                &A[j*incColA], incRowA, incColA,
                &x[j*incX], incX,
                1.0,
                y, incY);
}

void
dgemv_blk_row(int m, int n, double alpha,
              const double *A, int incRowA, int incColA,
              const double *x, int incX,
              double beta,
              double *y, int incY)
{
    int bs = DGEMV_BSxN;
    int i;

    for (i=0; i+bs<=m; i+=bs) {
        dgemv_BSxn(n, alpha,
                   &A[i*incRowA], incRowA, incColA,
                   x, incX,
                   beta,
                   &y[i*incY], incY);
    }
    dgemv_unblk(m-i, n, alpha,
                &A[i*incRowA], incRowA, incColA,
                x, incX,
                beta,
                &y[i*incY], incY);
}

//------------------------------------------------------------------------------

#ifndef DIM_M
#define DIM_M 5
#endif

#ifndef DIM_N
#define DIM_N 7
#endif

#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif

#if (ROWMAJOR==1)
#   define INCROW_A  DIM_N
#   define INCCOL_A  1
#else
#   define INCROW_A  1
#   define INCCOL_A  DIM_M
#endif

#ifndef INC_X
#define INC_X 1
#endif

#ifndef INC_Y
#define INC_Y 1
#endif

#ifndef ALPHA
#define ALPHA 2
#endif

#ifndef BETA
#define BETA 3
#endif


//
//  A, x, y_ will get initialized with initGeMatrix
//
double A[DIM_M*DIM_N];
double x[INC_X*DIM_N];
double y_[DIM_M];

//
// If BETA is not zero the result of BETA*y + ALPHA * A *x depends on the
// initial value of y.  We initialize y with y_ before each call of a gemv
// implementation.  So the results are always the same.
//
double y[INC_Y*DIM_M];

int
main()
{
    initGeMatrix(DIM_M, DIM_N, A, INCROW_A, INCCOL_A);
    initGeMatrix(DIM_N, 1, x, INC_X, 0);
    initGeMatrix(DIM_M, 1, y_, 1, 0);

    printf("A =\n");
    printGeMatrix(DIM_M, DIM_N, A, INCROW_A, INCCOL_A);

    printf("x =\n");
    printGeMatrix(DIM_N, 1, x, INC_X, 0);

    printf("y =\n");
    printGeMatrix(DIM_M, 1, y_, INC_Y, 0);

    //
    //  Aufruf von gemv_unblk
    //
    printf("gemv_unblk: %10.4lf * y + %10.4lf * A * x =\n\n",
           (double)BETA, (double)ALPHA);

    dcopy(DIM_M, y_, 1, y, INC_Y);
    dgemv_unblk(DIM_M, DIM_N,
                ALPHA,
                A, INCROW_A, INCCOL_A,
                x, INC_X,
                BETA,
                y, INC_Y);

    printGeMatrix(DIM_M, 1, y, INC_Y, 0);

    //
    //  Aufruf von gemv_blk_col
    //
    printf("gemv_blk_col: %10.4lf * y + %10.4lf * A * x =\n\n",
           (double)BETA, (double)ALPHA);

    dcopy(DIM_M, y_, 1, y, INC_Y);
    dgemv_blk_col(DIM_M, DIM_N,
                  ALPHA,
                  A, INCROW_A, INCCOL_A,
                  x, INC_X,
                  BETA,
                  y, INC_Y);

    printGeMatrix(DIM_M, 1, y, INC_Y, 0);

    //
    //  Aufruf von gemv_blk_row
    //
    printf("gemv_blk_row: %10.4lf * y + %10.4lf * A * x =\n\n",
           (double)BETA, (double)ALPHA);

    dcopy(DIM_M, y_, 1, y, INC_Y);
    dgemv_blk_row(DIM_M, DIM_N,
                  ALPHA,
                  A, INCROW_A, INCCOL_A,
                  x, INC_X,
                  BETA,
                  y, INC_Y);

    printGeMatrix(DIM_M, 1, y, INC_Y, 0);


    return 0;
}

Übersetzten und ausführen

$shell> gcc -Wall gemv_test_sol.c
$shell> ./a.out
A =
    1.0000     2.0000     3.0000     4.0000     5.0000     6.0000     7.0000 
    8.0000     9.0000    10.0000    11.0000    12.0000    13.0000    14.0000 
   15.0000    16.0000    17.0000    18.0000    19.0000    20.0000    21.0000 
   22.0000    23.0000    24.0000    25.0000    26.0000    27.0000    28.0000 
   29.0000    30.0000    31.0000    32.0000    33.0000    34.0000    35.0000 


x =
    1.0000 
    2.0000 
    3.0000 
    4.0000 
    5.0000 
    6.0000 
    7.0000 


y =
    1.0000 
    2.0000 
    3.0000 
    4.0000 
    5.0000 


gemv_unblk:     3.0000 * y +     2.0000 * A * x =

  283.0000 
  678.0000 
 1073.0000 
 1468.0000 
 1863.0000 


gemv_blk_col:     3.0000 * y +     2.0000 * A * x =

  283.0000 
  678.0000 
 1073.0000 
 1468.0000 
 1863.0000 


gemv_blk_row:     3.0000 * y +     2.0000 * A * x =

  283.0000 
  678.0000 
 1073.0000 
 1468.0000 
 1863.0000 
$shell> 

Benchmark

Übersetzen

$shell> gcc -Wall -Ofast -DROWMAJOR=1 -DDGEMV_BSxN=2 -o gemv_bench_row_bs2 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=1 -DDGEMV_BSxN=3 -o gemv_bench_row_bs3 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=1 -DDGEMV_BSxN=4 -o gemv_bench_row_bs4 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=1 -DDGEMV_BSxN=5 -o gemv_bench_row_bs5 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=1 -DDGEMV_BSxN=6 -o gemv_bench_row_bs6 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -DDGEMV_MxBS=2 -o gemv_bench_col_bs2 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -DDGEMV_MxBS=3 -o gemv_bench_col_bs3 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -DDGEMV_MxBS=4 -o gemv_bench_col_bs4 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -DDGEMV_MxBS=5 -o gemv_bench_col_bs5 gemv_bench_sol.c
$shell> gcc -Wall -Ofast -DROWMAJOR=0 -DDGEMV_MxBS=6 -o gemv_bench_col_bs6 gemv_bench_sol.c
$shell> 

Ausführen

$shell> ./gemv_bench_row_bs2 > report.gemv.row_bs2
$shell> ./gemv_bench_row_bs3 > report.gemv.row_bs3
$shell> ./gemv_bench_row_bs4 > report.gemv.row_bs4
$shell> ./gemv_bench_row_bs5 > report.gemv.row_bs5
$shell> ./gemv_bench_row_bs6 > report.gemv.row_bs6
$shell> ./gemv_bench_col_bs2 > report.gemv.col_bs2
$shell> ./gemv_bench_col_bs3 > report.gemv.col_bs3
$shell> ./gemv_bench_col_bs4 > report.gemv.col_bs4
$shell> ./gemv_bench_col_bs5 > report.gemv.col_bs5
$shell> ./gemv_bench_col_bs6 > report.gemv.col_bs6
$shell> 

Plots erzeugen

$shell> gnuplot plot.gemv_col
$shell> gnuplot plot.gemv_row
$shell> 

Plots anschauen

ColMajor Benchmarks

RowMajor Benchmarks