Possible Solution

Content

Source code

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <sys/times.h>
#include <unistd.h>
#include <mkl_types.h>

#ifndef MIN_M
#define MIN_M 100
#endif

#ifndef MIN_N
#define MIN_N 100
#endif

#ifndef MAX_M
#define MAX_M 1500
#endif

#ifndef MAX_N
#define MAX_N 1500
#endif

#ifndef INCX
#define INCX 1
#endif

#ifndef INCY
#define INCY 1
#endif

#ifndef ALPHA
#define ALPHA 1.5
#endif

#ifndef BETA
#define BETA 1.5
#endif

#ifndef T_MIN
#define T_MIN 5
#endif


double A[MAX_M*MAX_N];
double X[MAX_N*INCX];
double Y[MAX_M*INCY];
double Y1[MAX_M*INCY];
double Y2[MAX_M*INCY];
double Y3[MAX_M*INCY];
double Y4[MAX_M*INCY];
double Y5[MAX_M*INCY];

double
walltime()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}

void
initMatrix(size_t m, size_t n, double *A, size_t incRowA, size_t incColA)
{
    for (size_t j=0; j<n; ++j) {
        for (size_t i=0; i<m; ++i) {
            A[i*incRowA+j*incColA] = ((double)rand() - RAND_MAX/2)*200/RAND_MAX;
        }
    }
}

void
copyMatrix(size_t m, size_t n,
           const double *A, size_t incRowA, size_t incColA,
           double *B, size_t incRowB, size_t incColB)
{
    for (size_t j=0; j<n; ++j) {
        for (size_t i=0; i<m; ++i) {
            B[i*incRowB+j*incColB] = A[i*incRowA+j*incColA];
        }
    }
}

double
asumDiffMatrix(size_t m, size_t n,
               const double *A, size_t incRowA, size_t incColA,
               double *B, size_t incRowB, size_t incColB)
{
    double asum = 0;

    for (size_t j=0; j<n; ++j) {
        for (size_t i=0; i<m; ++i) {
            asum += fabs(B[i*incRowB+j*incColB] - A[i*incRowA+j*incColA]);
        }
    }
    return asum;
}

//------------------------------------------------------------------------------

void
dgemv(const char *trans,
      const MKL_INT *m, const MKL_INT *n, const double *alpha,
      const double *A, const MKL_INT *ldA, const double *x,
      const MKL_INT *incX,
      const double *beta, double *y, const MKL_INT *incY);

void
dgemv_mkl(MKL_INT m, MKL_INT n,
          double alpha,
          const double *A, MKL_INT incRowA, MKL_INT incColA,
          const double *x, MKL_INT incX,
          double beta,
          double *y, MKL_INT incY)
{
    MKL_INT ldA   = (incRowA==1) ? incColA : incRowA;
    char    trans = (incRowA==1) ? 'N' : 'T';
    MKL_INT M     = (incRowA==1) ? m : n;
    MKL_INT N     = (incRowA==1) ? n : m;

    dgemv(&trans, &M, &N, &alpha, A, &ldA, x, &incX, &beta, y, &incY);
}

//------------------------------------------------------------------------------

void
dscal_ulm(size_t n, double alpha, double *x, ptrdiff_t incX)
{
    if (alpha==1) {
        return;
    }
    if (alpha==0) {
        for (size_t i=0; i<n; ++i) {
            x[i*incX] = 0;
        }
    } else {
        for (size_t i=0; i<n; ++i) {
            x[i*incX] *= alpha;
        }
    }
}

double
ddot_ulm(size_t n,
         const double *x, ptrdiff_t incX,
         const double *y, ptrdiff_t incY)
{
    double result = 0;

    for (size_t i=0; i<n; ++i) {
        result += x[i*incX]*y[i*incY];
    }
    return result;
}

void
daxpy_ulm(size_t n, double alpha,
          const double *x, ptrdiff_t incX,
          double *y, ptrdiff_t incY)
{
    if (alpha==0) {
        return;
    }
    for (size_t i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}


void
dgemv_ulm(size_t m, size_t n,
          double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *x, ptrdiff_t incX,
          double beta,
          double *y, ptrdiff_t incY)
{
    dscal_ulm(m, beta, y, incY);

    if (alpha==0) {
        return;
    }

    if (incRowA > incColA) {
        for (size_t i=0; i<m; ++i) {
            y[i*incY] += alpha*ddot_ulm(n, &A[i*incRowA], incColA, x, incX);
        }
    } else  {
        for (size_t j=0; j<n; ++j) {
            daxpy_ulm(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
        }
    }
}

//------------------------------------------------------------------------------

#ifndef COLMAJOR
//#define COLMAJOR 1
#define COLMAJOR 0
#endif

int
main()
{
    size_t runs, incRowA, incColA;
    double t0, t1, t2;
    double diff2;
    double alpha = ALPHA;
    double beta  = BETA;

    initMatrix(MAX_M, MAX_N, A, 1, MAX_M);
    initMatrix(MAX_N, 1, X, INCX, 1);
    initMatrix(MAX_M, 1, Y, INCY, 1);

    printf("# COLMAJOR    = %d\n", COLMAJOR);
    printf("# T_MIN       = %d\n", T_MIN);
    printf("#RUN    M     N  INCROW  INCCOL");
    printf("    GEMV_MKL    GEMV_ULM");
    printf("    GEMV_MKL    GEMV_ULM");
    printf("       DIFF2");
    printf("\n");
    printf("#                              ");
    printf("    (t in s)    (t in s)");
    printf("    (MFLOPS)    (MFLOPS)");
    printf("           ");
    printf("\n");

    for (size_t i=0, m=MIN_M, n=MIN_N; m<=MAX_M && n<=MAX_N;
	    ++i, m+=100, n+=100) {

        if (COLMAJOR) {
            incRowA = 1;
            incColA = m;
        } else {
            incRowA = n;
            incColA = 1;
        }

        t1   = 0;
        runs = 0;
        do {
            copyMatrix(MAX_M, 1, Y, INCY, 1, Y1, INCY, 1);
            t0 = walltime();
            dgemv_mkl(m, n, alpha,
                      A, incRowA, incColA,
                      X, INCX,
                      beta,
                      Y1, INCY);
            t1 += walltime() - t0;
            ++runs;
        } while (t1<T_MIN);
        t1 /= runs;

        t2   = 0;
        runs = 0;
        do {
            copyMatrix(MAX_M, 1, Y, INCY, 1, Y2, INCY, 1);
            t0 = walltime();
            dgemv_ulm(m, n, alpha,
                      A, incRowA, incColA,
                      X, INCX,
                      beta,
                      Y2, INCY);
            t2 += walltime() - t0;
            ++runs;
        } while (t2<T_MIN);
        t2 /= runs;
        diff2 = asumDiffMatrix(m, 1, Y1, INCY, 1, Y2, INCY, 1);

        printf("%3ld %5ld %5ld %7ld %7ld ", i, m, n, incRowA, incColA);
        printf("%11.4lf %11.4lf ", t1, t2);
        printf("%11.4lf ", 2*(m/1000.0)*(n/1000.0)/t1);
        printf("%11.4lf ", 2*(m/1000.0)*(n/1000.0)/t2);
        printf("%11.4lf ", diff2);
        printf("\n");
    }
}

Test run

heim$ make
gcc -DCOLMAJOR=0 -I/opt/intel/compilers_and_libraries/linux/mkl/include -DMKL_ILP64 -std=c99 -Wall -Ofast -L/opt/intel/compilers_and_libraries/linux/lib/intel64 -L/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 -Wl,-rpath -Wl,/opt/intel/compilers_and_libraries/linux/lib/intel64 -Wl,-rpath -Wl,/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 -lmkl_intel_ilp64 -lmkl_core -lmkl_sequential -lm -lpthread -o gemv_rowmajor gemv.c
gcc  -DCOLMAJOR=1 -I/opt/intel/compilers_and_libraries/linux/mkl/include -DMKL_ILP64 -std=c99 -Wall -Ofast -L/opt/intel/compilers_and_libraries/linux/lib/intel64 -L/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 -Wl,-rpath -Wl,/opt/intel/compilers_and_libraries/linux/lib/intel64 -Wl,-rpath -Wl,/opt/intel/compilers_and_libraries/linux/mkl/lib/intel64 -lmkl_intel_ilp64 -lmkl_core -lmkl_sequential -lm -lpthread -o gemv_colmajor gemv.c
heim$ ./gemv_colmajor > gemv_colmajor.dat
heim$ ./gemv_rowmajor > gemv_rowmajor.dat
heim$ cat gemv_colmajor.dat
# COLMAJOR    = 1
# T_MIN       = 5
#RUN    M     N  INCROW  INCCOL    GEMV_MKL    GEMV_ULM    GEMV_MKL    GEMV_ULM       DIFF2
#                                  (t in s)    (t in s)    (MFLOPS)    (MFLOPS)           
  0   100   100       1     100      0.0000      0.0000   8669.1120   3936.8383      0.0000 
  1   200   200       1     200      0.0000      0.0000   8931.2415   4650.1876      0.0000 
  2   300   300       1     300      0.0000      0.0000   8584.8120   4907.1240      0.0000 
  3   400   400       1     400      0.0000      0.0001   8777.7086   5053.2535      0.0000 
  4   500   500       1     500      0.0001      0.0001   8649.0000   5094.9000      0.0000 
  5   600   600       1     600      0.0001      0.0001   8820.1440   5061.2695      0.0000 
  6   700   700       1     700      0.0001      0.0002   7939.7640   4919.6000      0.0000 
  7   800   800       1     800      0.0002      0.0003   7069.3812   4685.0560      0.0000 
  8   900   900       1     900      0.0003      0.0004   5902.4910   4434.4671      0.0000 
  9  1000  1000       1    1000      0.0004      0.0005   5428.8000   4160.8000      0.0000 
 10  1100  1100       1    1100      0.0005      0.0006   5063.6080   4170.9980      0.0000 
 11  1200  1200       1    1200      0.0006      0.0007   4974.1796   4119.5520      0.0000 
 12  1300  1300       1    1300      0.0007      0.0008   4763.0960   4100.5269      0.0000 
 13  1400  1400       1    1400      0.0009      0.0010   4306.5120   3964.6880      0.0000 
 14  1500  1500       1    1500      0.0010      0.0011   4295.7000   3996.9000      0.0000 
heim$ cat gemv_rowmajor.dat
# COLMAJOR    = 0
# T_MIN       = 5
#RUN    M     N  INCROW  INCCOL    GEMV_MKL    GEMV_ULM    GEMV_MKL    GEMV_ULM       DIFF2
#                                  (t in s)    (t in s)    (MFLOPS)    (MFLOPS)           
  0   100   100     100       1      0.0000      0.0000   8464.4960   4336.8400      0.0000 
  1   200   200     200       1      0.0000      0.0000   8541.9360   4775.8403      0.0000 
  2   300   300     300       1      0.0000      0.0000   8447.9040   4728.4671      0.0000 
  3   400   400     400       1      0.0000      0.0001   8638.9142   4815.6487      0.0000 
  4   500   500     500       1      0.0001      0.0001   8559.9800   4754.7904      0.0000 
  5   600   600     600       1      0.0001      0.0002   8389.7964   4717.2960      0.0000 
  6   700   700     700       1      0.0001      0.0002   8078.3360   4681.4600      0.0000 
  7   800   800     800       1      0.0002      0.0003   6930.9440   4406.6747      0.0000 
  8   900   900     900       1      0.0003      0.0004   6119.7120   4197.4200      0.0000 
  9  1000  1000    1000       1      0.0004      0.0005   5330.4000   4051.6000      0.0000 
 10  1100  1100    1100       1      0.0005      0.0006   4773.6920   3993.0000      0.0000 
 11  1200  1200    1200       1      0.0006      0.0007   5059.5840   4028.5440      0.0000 
 12  1300  1300    1300       1      0.0008      0.0009   4479.8520   3964.0640      0.0000 
 13  1400  1400    1400       1      0.0009      0.0010   4432.7360   3967.0400      0.0000 
 14  1500  1500    1500       1      0.0010      0.0011   4392.2156   3973.6527      0.0000 
heim$ gnuplot gemv_colmajor.plot
heim$ gnuplot gemv_rowmajor.plot
heim$ 

Benchmark