Content

Aufgabe: GEMM-Micro-Kernel

Die GEMM-Operation \(C \leftarrow \beta\,C + \alpha\,A B\) soll für folgenden Spezialfall programmiert werden:

Algorithmus

Bei der Implementierung soll seine lokale \(M_r \times N_r\) Matrix \(\overline{AB}\) benutzt werden die zunächst mit Null initialisiert und dann mit dem Produkt \(A \cdot B\) überschrieben wird:

  • \(\overline{AB} \leftarrow \mathbf{0}\)

  • \(\overline{AB} \leftarrow A \cdot B\)

  • Falls \(\beta = 0\)

    • \(C \leftarrow \alpha\,\overline{AB}\)

  • Sonst

    • \(C \leftarrow \beta\,C + \alpha\,\overline{AB}\)

Weitere Anforderungen:

Vorlage

#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

//-- setup and print matrices --------------------------------------------------

void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            A[i*incRowA+j*incColA] = i*n + j + 1;
        }
    }
}

void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            printf("%10.4lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n\n");
}

//------------------------------------------------------------------------------

#ifndef DGEMM_MC
#define DGEMM_MC 8
#endif

#ifndef DGEMM_NC
#define DGEMM_NC 9
#endif

#ifndef DGEMM_KC
#define DGEMM_KC 10
#endif

#ifndef DGEMM_MR
#define DGEMM_MR 4
#endif

#ifndef DGEMM_NR
#define DGEMM_NR 3
#endif



//------------------------------------------------------------------------------

void
dpack_A(int m, int k, const double *A, int incRowA, int incColA, double *p)
{
    int i, i0, j, l, nu;
    int mp = (m+DGEMM_MR-1) / DGEMM_MR;

    for (j=0; j<k; ++j) {
        for (l=0; l<mp; ++l) {
            for (i0=0; i0<DGEMM_MR; ++i0) {
                i     = l*DGEMM_MR + i0;
                nu    = l*DGEMM_MR*k + j*DGEMM_MR + i0;
                p[nu] = (i<m) ? A[i*incRowA+j*incColA]
                              : 0;
            }
        }
    }
}

void
dpack_B(int k, int n, const double *B, int incRowB, int incColB, double *p)
{
    int i, j, j0, l, nu;
    int np = (n+DGEMM_NR-1) / DGEMM_NR;

    for (l=0; l<np; ++l) {
        for (j0=0; j0<DGEMM_NR; ++j0) {
            j = l*DGEMM_NR + j0;
            for (i=0; i<k; ++i) {
                nu    = l*DGEMM_NR*k + i*DGEMM_NR + j0;
                p[nu] = (j<n) ? B[i*incRowB+j*incColB]
                              : 0;
            }
        }
    }
}

void
dgemm_micro(int k,
            double alpha,
            const double *A,
            const double *B,
            double beta,
            double *C, int incRowC, int incColC)
{
    int    i, j, l;
    double AB[DGEMM_MR*DGEMM_NR];

    // ... your code here ...
}

//------------------------------------------------------------------------------
void
dgecopy(int m, int n,
        const double *X, int incRowX, int incColX,
        double *Y, int incRowY, int incColY)
{
    int i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            Y[i*incRowY+j*incColY] = X[i*incRowX+j*incColX];
        }
    }
}

void
dgemm_ilj(int m, int n, int k,
          double alpha,
          const double *A, int incRowA, int incColA,
          const double *B, int incRowB, int incColB,
          double beta,
          double *C, int incRowC, int incColC)
{
    int i, j, l;

    for (i=0; i<m; ++i) {
        for (l=0; l<k; ++l) {
            for (j=0; j<n; ++j) {
                if (l==0) {
                    C[i*incRowC+j*incColC] *= beta;
                }
                C[i*incRowC+j*incColC] += alpha*A[i*incRowA+l*incColA]
                                               *B[l*incRowB+j*incColB];
            }
        }
    }
}
//------------------------------------------------------------------------------

#ifndef DIM_K
#define DIM_K 12
#endif

#ifndef ALPHA
#define ALPHA 2
#endif

#ifndef BETA
#define BETA 2
#endif

#ifndef ROWMAJOR
#define INCROW_C 1
#define INCCOL_C DGEMM_MR
#else
#define INCROW_C DGEMM_NR
#define INCCOL_C 1
#endif


double A_panel[DIM_K*DGEMM_MR];
double B_panel[DIM_K*DGEMM_NR];
double C[DGEMM_MR*DGEMM_NR];
double C_ref[DGEMM_MR*DGEMM_NR];

int
main()
{
    initGeMatrix(DGEMM_MR, DIM_K, A_panel, 1, DGEMM_MR);
    initGeMatrix(DIM_K, DGEMM_NR, B_panel, DGEMM_NR, 1);
    initGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);
    dgecopy(DGEMM_MR, DGEMM_NR,
            C, INCROW_C, INCCOL_C,
            C_ref, INCROW_C, INCCOL_C);

    printf("A_panel =\n");
    printGeMatrix(DGEMM_MR, DIM_K, A_panel, 1, DGEMM_MR);
    printf("B_panel =\n");
    printGeMatrix(DIM_K, DGEMM_NR, B_panel, DGEMM_NR, 1);
    printf("C =\n");
    printGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);

    /* ... your code: call dgemm_micro! */

    printf("%lf*C + %lf*A*B =\n", (double)ALPHA, (double)BETA);
    printGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);

    printf("C_ref =\n");
    dgemm_ilj(DGEMM_MR, DGEMM_NR, DIM_K,
              (double)ALPHA,
              A_panel, 1, DGEMM_MR,
              B_panel, DGEMM_NR, 1,
              (double)BETA,
              C_ref, INCROW_C, INCCOL_C);
    printGeMatrix(DGEMM_MR, DGEMM_NR, C_ref, INCROW_C, INCCOL_C);

    return 0;
}