1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
#include <math.h>           // for nan()
#include <stddef.h>         // for size_t, ptrdiff_t
#include <stdio.h>          // for printf
#include <stdlib.h>         // for malloc(), free()
#include <sys/times.h>      // needed for walltime()
#include <unistd.h>         // needed for walltime()

//-- Function for benchmarking and testing -------------------------------------

double
walltime()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}

//------------------------------------------------------------------------------

void
initGeMatrix(size_t m, size_t n,
             double *A,
             ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            A[i*incRowA + j*incColA] = i*n + j +1;
        }
    }
}

void
dgeaxpy(size_t m, size_t n,
        double alpha,
        const double *A,
        ptrdiff_t incRowA, ptrdiff_t incColA,
        double *B,
        ptrdiff_t incRowB, ptrdiff_t incColB)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            B[i*incRowB + j*incColB] += alpha*A[i*incRowA + j*incColA];
        }
    }
}

#define MAX_M 2*16*16*64
#define MAX_N 2*16*16*64

int
main()
{
    double *A = malloc(MAX_M*MAX_N*sizeof(double));
    double *B = malloc(MAX_M*MAX_N*sizeof(double));

    printf("#%5s %5s %10s %10s\n", "m", "n", "t (colm)", "t (rowm)");

    for (size_t m=128, n=128; m<=MAX_M && n<=MAX_N; m+=16, n+=16) {

        printf(" %5ld %5ld ", (long)m, (long)n);


        // bench initialization for col major matrix
        {

            ptrdiff_t incRowA = MAX_N;
            ptrdiff_t incColA = 1;

            ptrdiff_t incRowB = MAX_N;
            ptrdiff_t incColB = 1;

            double    alpha = 3;

            initGeMatrix(m, n, A, incRowA, incColA);
            double t  = walltime();
            dgeaxpy(m, n, alpha, A, incRowA, incColA, B, incRowB, incColB);
            t = walltime() - t;

            printf("%10.2lf ", t);
        }

        // bench initialization for col major matrix
        {

            ptrdiff_t incRowA = 1;
            ptrdiff_t incColA = MAX_M;

            ptrdiff_t incRowB = 1;
            ptrdiff_t incColB = MAX_M;

            double    alpha = 3;

            initGeMatrix(m, n, A, incRowA, incColA);
            double t  = walltime();
            dgeaxpy(m, n, alpha, A, incRowA, incColA, B, incRowB, incColB);
            t = walltime() - t;

            printf("%10.2lf ", t);
        }

        printf("\n");

    }
    free(B);
    free(A);
}