Namenskonflikte und Co

Bei unseren C Implementierungen haben wir Namenskonflikte durch Namenszusätze vermieden. Zum Beispiel nannten wir verschiedene GEMM Implementierungen dgemm_var1, degemm_var2, usw. oder etwas mehr aussagekräftig dgemm_colmajor, dgemm_rowmajor, usw.

Konflikte bei globalen Variablen haben wir durch Sorgfalt vermieden:

Beim Benchmark wurden die globalen Arrays A, B, C nur in main benutzt.
Die globalen Arrays A_, B_, C_ nur in einer gepufferten GEMM Variante.

Dass dies bei größeren Projekten nicht mehr so einfach geht, dürfte klar sein. Hier sind im Allgemeinen auch Namenszusätze bei globalen Variablen notwendig.

Auch bei den Funktionsnamen wurden eventuelle Namenskonflikte durch das Prefix d gelöst. Dies legt fast, dass die Implementierung Matrizen multipliziert deren Elemente vom Typ double sind.

C-Vorlage mit Namenszusätzen

Wie unschön eine C-Implementierung wird, wenn so Namenskonflikte vermieden werden, soll folgendes Beispiel enthalten. Es enthält sogar Zeilen mit mehr als 80 Zeichen! Es basiert auf der Lösung von Session 6:

session8/gemm.c

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

#ifndef COLMAJOR
#define COLMAJOR 1
#endif

#ifndef MAXDIM_M
#define MAXDIM_M    4000
#endif

#ifndef MAXDIM_N
#define MAXDIM_N    4000
#endif

#ifndef MAXDIM_K
#define MAXDIM_K    4000
#endif

#ifndef MIN_M
#define MIN_M   100
#endif

#ifndef MIN_N
#define MIN_N   100
#endif

#ifndef MIN_K
#define MIN_K   100
#endif

#ifndef MAX_M
#define MAX_M   7000
#endif

#ifndef MAX_N
#define MAX_N   7000
#endif

#ifndef MAX_K
#define MAX_K   7000
#endif

#ifndef INC_M
#define INC_M   100
#endif

#ifndef INC_N
#define INC_N   100
#endif

#ifndef INC_K
#define INC_K   100
#endif

#ifndef ALPHA
#define ALPHA   1.5
#endif

#ifndef BETA
#define BETA    1.5
#endif


//==============================================================================
// bench
//==============================================================================


//------------------------------------------------------------------------------
// Auxiliary data for benchmarking
//------------------------------------------------------------------------------


double A_bench[MAXDIM_M*MAXDIM_K];
double B_bench[MAXDIM_K*MAXDIM_N];

double C1_bench[MAXDIM_M*MAXDIM_N];
double C2_bench[MAXDIM_M*MAXDIM_N];

//------------------------------------------------------------------------------
// Auxiliary functions for benchmarking
//------------------------------------------------------------------------------

double
walltime_bench()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}



void
initMatrix_bench(long m, long n, double *A, long incRowA, long incColA)
{
    long i, j;
    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            A[i*incRowA+j*incColA] = ((double)rand() - RAND_MAX/2)*200/RAND_MAX;
        }
    }
}

double
dasumDiffMatrix_bench(long m, long n,
                      const double *A, long incRowA, long incColA,
                      double *B, long incRowB, long incColB)
{
    long i, j;

    double asum = 0;

    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            asum += fabs(B[i*incRowB+j*incColB] - A[i*incRowA+j*incColA]);
        }
    }
    return asum;
}


//==============================================================================
// ulmBLAS
//==============================================================================

//------------------------------------------------------------------------------
// A <- B
//------------------------------------------------------------------------------

void
dgecopy_ulmBLAS(long m, long n,
                const double *A, long incRowA, long incColA,
                double *B, long incRowB, long incColB)
{
    long i, j;
    for (j=0; j<n; ++j) {
        for (i=0; i<m; ++i) {
            B[i*incRowB+j*incColB] = A[i*incRowA+j*incColA];
        }
    }
}

//------------------------------------------------------------------------------
// Y <- Y + alpha*Y
//------------------------------------------------------------------------------

void
dgeaxpy_ulmBLAS(long m, long n,
                double alpha,
                const double *X, long incRowX, long incColX,
                double       *Y, long incRowY, long incColY)
{
    long i, j;

    for (i=0; i<m; ++i) {
        for (j=0; j<n; ++j) {
            Y[i*incRowY+j*incColY] += alpha*X[i*incRowX+j*incColX];
        }
    }
}

//------------------------------------------------------------------------------
// A <- alpha * A
//------------------------------------------------------------------------------

void
dgescal_ulmBLAS(long m, long n,
                double alpha,
                double *X, long incRowX, long incColX)
{
    long i, j;

    if (alpha!=1.0) {
        for (i=0; i<m; ++i) {
            for (j=0; j<n; ++j) {
                X[i*incRowX+j*incColX] *= alpha;
            }
        }
    }
}

//==============================================================================
// refColMajor
//==============================================================================

void
dgemm_refColMajor(long m, long n, long k,
                  double alpha,
                  const double *A, long incRowA, long incColA,
                  const double *B, long incRowB, long incColB,
                  double beta,
                  double *C, long incRowC, long incColC)
{
    long i, j, l;

    dgescal_ulmBLAS(m, n, beta, C, incRowC, incColC);
    for (j=0; j<n; ++j) {
        for (l=0; l<k; ++l) {
            for (i=0; i<m; ++i) {
                C[i*incRowC+j*incColC] += alpha*A[i*incRowA+l*incColA]
                                               *B[l*incRowB+j*incColB];
            }
        }
    }
}

//==============================================================================
// simpleBuffer
//==============================================================================

#ifndef SIMPLE_PUFFER_MC
#define SIMPLE_PUFFER_MC 256
#endif

#ifndef SIMPLE_PUFFER_KC
#define SIMPLE_PUFFER_KC 256
#endif

#ifndef SIMPLE_PUFFER_NC
#define SIMPLE_PUFFER_NC 1024
#endif

double A_simpleBuffer[SIMPLE_PUFFER_MC*SIMPLE_PUFFER_KC];
double B_simpleBuffer[SIMPLE_PUFFER_KC*SIMPLE_PUFFER_NC];
double C_simpleBuffer[SIMPLE_PUFFER_MC*SIMPLE_PUFFER_NC];

void
dgemm_simpleBuffer(long m, long n, long k,
                   double alpha,
                   const double *A, long incRowA, long incColA,
                   const double *B, long incRowB, long incColB,
                   double beta,
                   double *C, long incRowC, long incColC)
{
    long i, j, l;

    long MC = SIMPLE_PUFFER_MC;
    long NC = SIMPLE_PUFFER_NC;
    long KC = SIMPLE_PUFFER_KC;

    long mb = m / MC;
    long nb = n / NC;
    long kb = k / KC;

    long mr = m % MC;
    long nr = n % NC;
    long kr = k % KC;

    dgescal_ulmBLAS(m, n, beta, C, incRowC, incColC);

    for (j=0; j<=nb; ++j) {
        long N = (j<nb || nr==0) ? NC : nr;

        for (l=0; l<=kb; ++l) {
            long K = (l<kb || kr==0) ? KC : kr;

            dgecopy_ulmBLAS(K, N,
                            &B[l*KC*incRowB+j*NC*incColB], incRowB, incColB,
                            B_simpleBuffer, 1, KC);

            for (i=0; i<=mb; ++i) {
                long M = (i<mb || mr==0) ? MC : mr;

                dgecopy_ulmBLAS(M, K,
                                &A[i*MC*incRowA+l*KC*incColA], incRowA, incColA,
                                A_simpleBuffer, 1, MC);

                dgemm_refColMajor(M, N, K,
                                  1.0,
                                  A_simpleBuffer, 1, MC,
                                  B_simpleBuffer, 1, KC,
                                  0.0,
                                  C_simpleBuffer, 1, MC);

                dgeaxpy_ulmBLAS(M, N,
                                alpha,
                                C_simpleBuffer, 1, MC,
                                &C[i*MC*incRowC+j*NC*incColC], incRowC, incColC);
            }
        }
    }
}

int
main()
{
    initMatrix_bench(MAXDIM_M, MAXDIM_K, A_bench, 1, MAXDIM_M);
    initMatrix_bench(MAXDIM_K, MAXDIM_N, B_bench, 1, MAXDIM_K);
    initMatrix_bench(MAXDIM_M, MAXDIM_N, C1_bench, 1, MAXDIM_M);
    dgecopy_ulmBLAS(MAXDIM_M, MAXDIM_N,
                    C1_bench, 1, MAXDIM_M,
                    C2_bench, 1, MAXDIM_M);

    long m, n, k;

    // Header-Zeile f�r die Ausgabe
    printf("%5s %5s %5s ", "m", "n", "k");
    printf("%5s %5s ", "IRA", "ICA");
    printf("%5s %5s ", "IRB", "ICB");
    printf("%5s %5s ", "IRC", "ICC");
    printf("%20s %9s", "refColMajor: t", "MFLOPS");
    printf("%20s %9s %9s", "refSimpleBuffer: t", "MFLOPS", "diff");
    printf("\n");

    for (m = MIN_M, n = MIN_N, k = MIN_K;
         m <=MAX_M && n <= MAX_N && k <= MAX_K;
         m += INC_M, n += INC_N, k += INC_K)
    {
        double t0, t1, diff;

        long incRowA = (COLMAJOR) ? 1 : k;
        long incColA = (COLMAJOR) ? m : 1;

        long incRowB = (COLMAJOR) ? 1 : n;
        long incColB = (COLMAJOR) ? k : 1;

        long incRowC = (COLMAJOR) ? 1 : n;
        long incColC = (COLMAJOR) ? m : 1;

        printf("%5ld %5ld %5ld ", m, n, k);
        printf("%5ld %5ld ", incRowA, incColA);
        printf("%5ld %5ld ", incRowB, incColB);
        printf("%5ld %5ld ", incRowC, incColC);

        t0 = walltime_bench();
        dgemm_refColMajor(m, n, k, ALPHA,
                          A_bench, incRowA, incColA,
                          B_bench, incRowB, incColB,
                          BETA,
                          C1_bench, incRowC, incColC);
        t1 = walltime_bench() - t0;
        printf("%20.4lf %9.2lf", t1, 2.*m/1000*n/1000*k/t1);

        t0 = walltime_bench();
        dgemm_simpleBuffer(m, n, k, ALPHA,
                           A_bench, incRowA, incColA,
                           B_bench, incRowB, incColB,
                           BETA,
                           C2_bench, incRowC, incColC);
        t1 = walltime_bench() - t0;
        diff = dasumDiffMatrix_bench(m, n,
                                     C1_bench, incRowC, incColC,
                                     C2_bench, incRowC, incColC);
        printf("%20.4lf %9.2lf %9.1e", t1, 2.*m/1000*n/1000*k/t1, diff);
        printf("\n");
    }
    return 0;
}

Aufgabe

Nutzt C++ als ein verbessertes C, indem ihr:

die Header in den Include-Direktiven anpasst,
Namensräume bench, refColMajor und simpleBuffer statt Namenszusätze verwendet,
bei Funktionsnamen den Buchstaben d streicht, wenn dieser genutzt wird, um die Funktion für double zu kennzeichnen, und indem ihr
Schleifenvariablen in der Schleife bei der Initialisierung definiert.

Natürlich dürfen Typen für Dimensionen auch auf std::size_t geändert werden.