Lösungsvorschlag

//
// BLAS Level 1
//

#include <math.h>

double
ddot(int n, const double *x, int incX, const double *y, int incY)
{
    int     i;
    double  alpha = 0;

    for (i=0; i<n; ++i) {
        alpha += x[i*incX]*y[i*incY];

    }
    return alpha;
}

void
daxpy(int n, double alpha, const double *x, int incX, double *y, int incY)
{
    int i;

    if (alpha==0) {
        return;
    }
    for (i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

void
dscal(int n, double alpha, double *x, int incX)
{
    int i;

    if (alpha==1) {
        return;
    }
    for (i=0; i<n; ++i) {
        x[i*incX] *= alpha;
    }
}

void
dcopy(int n, const double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        y[i*incY] = x[i*incX];
    }
}

void
dswap(int n, double *x, int incX, double *y, int incY)
{
    int i;

    for (i=0; i<n; ++i) {
        double tmp;

        tmp = x[i*incX];
        x[i*incX] = y[i*incY];
        y[i*incY] = tmp;
    }
}

double
damax(int n, const double *x, int incX)
{
    int     i;
    double  result = 0;

    for (i=0; i<n; ++i) {
        if (fabs(x[i*incX])>result) {
            result = fabs(x[i*incX]);
        }
    }
    return result;
}

//
//  BLAS Level 1 extensions
//

void
dgecopy(int m, int n,
        const double *X, int incRowX, int incColX,
        double *Y, int incRowY, int incColY)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            Y[i*incRowY+j*incColY] = X[i*incRowX+j*incColX];
        }
    }
}

void
dgescal(int m, int n, double alpha,
        double *X, int incRowX, int incColX)
{
    // ... your code here ...
}

void
dgeaxpy(int m, int n, double alpha,
        const double *X, int incRowX, int incColX,
        double *Y, int incRowY, int incColY)
{
    int i, j;

    if (alpha==0 || m==0 || n==0) {
        return;
    }
    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            Y[i*incRowY+j*incColY] += alpha*X[i*incRowX+j*incColX];
        }
    }
}

double
dgenrm1(int m, int n, const double *A, int incRowA, int incColA)
{
    int     i, j;
    double  result = 0;

    for (j=0; j<n; ++j) {
        double sum = 0;
        for (i=0; i<m; ++i) {
            sum += fabs(A[i*incRowA+j*incColA]);
        }
        if (sum>result) {
            result = sum;
        }
    }
    return result;
}

double
dtrnrm1(int m, int n,  int unitDiag, int lower,
        const double *A, int incRowA, int incColA)
{
    int     i, j;
    double  result = 0;

    for (j=0; j<n; ++j) {
        double sum = 0;
        for (i=0; i<m; ++i) {
            if (unitDiag && (i==j)) {
                sum += 1.0;
            } else if ((lower && (i>=j)) || (!lower && (i<=j))) {
                sum += fabs(A[i*incRowA+j*incColA]);
            }
        }
        if (sum>result) {
            result = sum;
        }
    }
    return result;
}


Benchmark

$shell> make clean
rm -f ../ulmblas.a  lu.o  level3.o  level1.o  level2.o
$shell> 
$shell> pwd
/home/numerik/hpc/ss16/session07/hpc_project_sol/ulmblas
$shell> make
gcc -Wall -Ofast -I ..   -c -o lu.o lu.c
gcc -Wall -Ofast -I ..   -c -o level3.o level3.c
gcc -Wall -Ofast -I ..   -c -o level1.o level1.c
gcc -Wall -Ofast -I ..   -c -o level2.o level2.c
ar cru ../ulmblas.a  lu.o  level3.o  level1.o  level2.o
ranlib ../ulmblas.a
$shell> 
$shell> pwd
/home/numerik/hpc/ss16/session07/hpc_project_sol/bench
$shell> make
gcc -Wall -Ofast -I ..   -c -o aux.o aux.c
gcc -Wall -Ofast -I ..   -c -o refblas.o refblas.c
gcc -Wall -Ofast -I ..   -c -o errbound.o errbound.c
gcc -Wall -Ofast -I ..  aux.o  refblas.o  errbound.o -o bench_gemm_vv bench_gemm_vv.c ../ulmblas.a
gcc -Wall -Ofast -I ..  aux.o  refblas.o  errbound.o -o bench_gemm_vm bench_gemm_vm.c ../ulmblas.a
gcc -Wall -Ofast -I ..  aux.o  refblas.o  errbound.o -o bench_gemm_mv bench_gemm_mv.c ../ulmblas.a
rm refblas.o aux.o errbound.o
$shell> ./bench_gemm_vm > report.gemv_vm_ccc
$shell> ./bench_gemm_mv > report.gemv_mv_ccc
$shell> ./bench_gemm_vv > report.gemv_vv_ccc
$shell> 
set terminal svg size 1200, 500
set output "bench.gemm_ccc.svg"
set xlabel "Matrix Dimension (M=N=K)"
set ylabel "MFLOPS"
set title "GEMM (Unblocked): A,B, C Col-Major"
set key outside
set pointsize 1.25
plot "report.gemv_mv_ccc" using 1:8  with linespoints lt  1 lw 1 title "dgemm_jil", \
     "report.gemv_mv_ccc" using 1:11 with linespoints lt  2 lw 1 title "dgemm_jli", \
     "report.gemv_vm_ccc" using 1:8 with  linespoints lt  3 lw 1 title "dgemm_ijl", \
     "report.gemv_vm_ccc" using 1:11 with linespoints lt  4 lw 1 title "dgemm_ilj", \
     "report.gemv_vv_ccc" using 1:8 with  linespoints lt  5 lw 1 title "dgemm_lij", \
     "report.gemv_vv_ccc" using 1:11 with linespoints lt  6 lw 1 title "dgemm_lji"
$shell> gnuplot plot.gemm_unblk
$shell>