Matrix product

Content

In BLAS (Basic Linear Algebra Subprograms) the general matrix product performs

\[C \leftarrow \beta C + \alpha A B\]

which is named GEMM (general matrix-matrix product) and where

\[\alpha \in \mathbb{K},\quad \beta \in \mathbb{K},\quad A \in \mathbb{K}^{m\times k},\quad B \in \mathbb{K}^{k\times n},\quad\text{and}\quad C \in \mathbb{K}^{m\times n}\]

where \(\mathbb{K}\) represents a floating point type, i.e. double.

Special cases

Exercise

Below we provide a simple benchmark stub for the GEMM operation. It already contains an implementation that traverses matrices \(A\), \(B\) and \(C\) in row major order:

Stub for the GEMM benchmark

#include <math.h>
// TODO: Add missing header files
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>


#ifndef MINDIM_M
#define MINDIM_M    100
#endif

#ifndef MINDIM_N
#define MINDIM_N    100
#endif

#ifndef MINDIM_K
#define MINDIM_K    100
#endif

#ifndef MAXDIM_M
#define MAXDIM_M    1000
#endif

#ifndef MAXDIM_N
#define MAXDIM_N    1000
#endif

#ifndef MAXDIM_K
#define MAXDIM_K    1000
#endif

#ifndef INC_M
#define INC_M   100
#endif

#ifndef INC_N
#define INC_N   100
#endif

#ifndef INC_K
#define INC_K   100
#endif

#ifndef MIN_T
#define MIN_T   1
#endif

#ifndef ALPHA
#define ALPHA   1
#endif

#ifndef BETA
#define BETA   1
#endif

double A[MAXDIM_M*MAXDIM_K];
double B[MAXDIM_K*MAXDIM_N];
double C1[MAXDIM_M*MAXDIM_N];
double C2[MAXDIM_M*MAXDIM_N];

/* return real time in seconds since start of the process */
double
wallTime()
{
    static clock_t ticks_per_second = 0;
    if (!ticks_per_second) {
        ticks_per_second = sysconf(_SC_CLK_TCK);
    }
    struct tms timebuf;
    /* times returns the number of real time ticks passed since start */
    return (double) times(&timebuf) / ticks_per_second;
}

double
asumDiff(size_t m, size_t n,
         const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
         const double *B, ptrdiff_t incRowB, ptrdiff_t incColB)
{
    double diff = 0;
    for (size_t i = 0; i < m; ++i) {
        for (size_t j = 0; j < n; ++j) {
            diff += fabs(B[i*incRowB+j*incColB] - A[i*incRowA+j*incColA]);
        }
    }
    return diff;
}

void
initMatrix(size_t m, size_t n, double *A, ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t j = 0; j < n; ++j) {
        for (size_t i = 0; i < m; ++i) {
            A[i*incRowA+j*incColA] = j*n+i+1;
        }
    }
}

void
printMatrix(size_t m, size_t n, const double *A,
            ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i = 0; i < m; ++i) {
        printf("   ");
        for (size_t j = 0; j < n; ++j) {
            printf("%4.1lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

void
dgemm_row(size_t m, size_t n, size_t k, double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *B, ptrdiff_t incRowB, ptrdiff_t incColB,
          double beta,
          double *C, ptrdiff_t incRowC, ptrdiff_t incColC)
{
    if (beta!=0) {
        if (beta!=1) {
            for (size_t i=0; i<m; ++i) {
                for (size_t j=0; j<n; ++j) {
                    C[i*incRowC+j*incColC] *= beta;
                }
            }
        }
    } else {
        for (size_t i=0; i<m; ++i) {
            for (size_t j=0; j<n; ++j) {
                C[i*incRowC+j*incColC] = 0;
            }
        }
    }
    if (alpha==0 || k==0) {
        return;
    }
    for (size_t i=0; i<m; ++i) {
        for (size_t l=0; l<k; ++l) {
            for (size_t j=0; j<n; ++j) {
                C[i*incRowC+j*incColC] +=
                    alpha*A[i*incRowA+l*incColA]*B[l*incRowB+j*incColB];
            }
        }
    }
}

void
dgemm_col(size_t m, size_t n, size_t k, double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *B, ptrdiff_t incRowB, ptrdiff_t incColB,
          double beta,
          double *C, ptrdiff_t incRowC, ptrdiff_t incColC)
{
    // TODO: Your code here ...
}


int
main()
{
    printf("   M    N    K      t1      t2   t2/t1       diff\n");
    printf("               col-maj row-maj\n");
    printf("=================================================\n");

    for (size_t m = MINDIM_M, n = MINDIM_N, k = MINDIM_K;
         m <= MAXDIM_M && n <= MAXDIM_N && k <= MAXDIM_K;
	 m += INC_M, n += INC_N, k += INC_K)
    {
        // set storage order for A, B, C1, C2
        ptrdiff_t incRowA = 1, incColA = m;
        ptrdiff_t incRowB = 1, incColB = k;
        ptrdiff_t incRowC = 1, incColC = m;     // used for C1, C2

        initMatrix(m, k, A, incRowA, incColA);
        initMatrix(k, n, B, incRowB, incColB);

        size_t runs = 0;
        double t1 = 0;
        do {
            initMatrix(m, n, C1, incRowC, incColC);

            double t0 = wallTime();
            dgemm_col(m, n, k, ALPHA,
                      A, incRowA, incColA,
                      B, incRowB, incColB,
                      BETA,
                      C1, incRowC, incColC);
            t1 += wallTime() - t0;
            ++runs;
        } while (t1 < MIN_T);
        t1 /= runs;

        runs = 0;
        double t2 = 0;
        do {
            initMatrix(m, n, C2, incRowC, incColC);

            double t0 = wallTime();
            dgemm_row(m, n, k,
                      ALPHA,
                      A, incRowA, incColA,
                      B, incRowB, incColB,
                      BETA,
                      C2, incRowC, incColC);
            t2 += wallTime() - t0;
            ++runs;
        } while (t2 < MIN_T);
        t2 /= runs;

        double diff = asumDiff(m, n,
                               C1, incRowC, incColC,
                               C2, incRowC, incColC);

        printf("%4zu %4zu %4zu %7.2lf %7.2lf %7.2lf %10.2le\n",
	       m, n, k, t1, t2, t2/t1, diff);
    }
}