1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
     111
     112
     113
     114
     115
     116
     117
     118
     119
     120
     121
     122
     123
     124
     125
     126
     127
     128
     129
     130
     131
     132
     133
     134
     135
     136
     137
     138
     139
     140
     141
     142
     143
     144
     145
     146
     147
     148
     149
     150
     151
     152
     153
     154
     155
     156
     157
     158
     159
     160
     161
     162
     163
     164
#include <stdio.h>
#include <bench/aux.h>
#include <bench/refblas.h>
#include <bench/errbound.h>
#include <ulmblas/level1.h>
#include <ulmblas/level3.h>

#ifndef ALPHA
#define ALPHA 2
#endif

#ifndef BETA
#define BETA 2
#endif

#ifndef MIN_N
#define MIN_N 100
#endif

#ifndef MAX_N
#define MAX_N 1500
#endif

#ifndef INC_N
#define INC_N 100
#endif

#ifndef MIN_M
#define MIN_M 100
#endif

#ifndef MAX_M
#define MAX_M 1500
#endif

#ifndef INC_M
#define INC_M 100
#endif

#ifndef MIN_K
#define MIN_K 100
#endif

#ifndef MAX_K
#define MAX_K 1500
#endif

#ifndef INC_K
#define INC_K 100
#endif

#ifndef ROWMAJOR_A
#define ROWMAJOR_A 0
#endif

#ifndef ROWMAJOR_B
#define ROWMAJOR_B 0
#endif

#ifndef ROWMAJOR_C
#define ROWMAJOR_C 0
#endif

#if (ROWMAJOR_A==1)
#   define INCROW_A  MAX_K
#   define INCCOL_A  1
#else
#   define INCROW_A  1
#   define INCCOL_A  MAX_M
#endif

#if (ROWMAJOR_B==1)
#   define INCROW_B  MAX_N
#   define INCCOL_B  1
#else
#   define INCROW_B  1
#   define INCCOL_B  MAX_K
#endif

#if (ROWMAJOR_C==1)
#   define INCROW_C  MAX_N
#   define INCCOL_C  1
#else
#   define INCROW_C  1
#   define INCCOL_C  MAX_M
#endif

#define MIN(X,Y)   ((X)<(Y) ? (X) : (Y))
#define MAX(X,Y)   ((X)>(Y) ? (X) : (Y))

double A_[MAX_M*MAX_K*MIN(INCROW_A,INCCOL_A)];
double B_[MAX_K*MAX_N*MIN(INCROW_B,INCCOL_B)];
double C_[MAX_M*MAX_N];

double C_0[MAX_M*MAX_N*MIN(INCROW_A,INCCOL_A)];
double C_1[MAX_M*MAX_N*MIN(INCROW_A,INCCOL_A)];

int
main()
{
    int m, n, k;

    randGeMatrix(MAX_M, MAX_K, A_, INCROW_A, INCCOL_A);
    randGeMatrix(MAX_K, MAX_N, B_, INCROW_B, INCCOL_B);
    randGeMatrix(MAX_M, MAX_N, C_, 1, MAX_M);

    printf("#%9s %9s %9s", "m", "n", "k");
    printf(" %12s %12s %12s", "t", "MFLOPS", "err");
    printf(" %12s %12s %12s", "t", "MFLOPS", "err");
    printf("\n");

    for (m=MIN_M, n=MIN_N, k=MIN_K;
         n<=MAX_N && m<=MAX_M && k<=MAX_K;
         m+=INC_M, n+=INC_N, k+=INC_K)
    {
        double t, dt, err;
        int    runs  = 1;
        double ops   = 2.0*m/1000*n/1000*k;

        printf(" %9d %9d %9d", m, n, k);

        // compute reference solution
        dgecopy(m, n, C_, 1, MAX_M, C_0, INCROW_C, INCCOL_C);
        dgemm_ref(m, n, k,
                  ALPHA,
                  A_, INCROW_A, INCCOL_A,
                  B_, INCROW_B, INCCOL_B,
                  BETA,
                  C_0, INCROW_C, INCCOL_C);

        // bench dgemm

        t    = 0;
        runs = 0;
        do {
            dgecopy(m, n, C_, 1, MAX_M, C_1, INCROW_C, INCCOL_C);
            dt = walltime();
            dgemm(m, n, k,
                  ALPHA,
                  A_, INCROW_A, INCCOL_A,
                  B_, INCROW_B, INCCOL_B,
                  BETA,
                  C_1, INCROW_C, INCCOL_C);
            dt = walltime() - dt;
            t += dt;
            ++runs;
        } while (t<0.3);
        t /= runs;

        err = err_dgemm(m, n, k,
                        ALPHA,
                        A_, INCROW_A, INCCOL_A,
                        B_, INCROW_B, INCCOL_B,
                        BETA,
                        C_0, INCROW_C, INCCOL_C,
                        C_1, INCROW_C, INCCOL_C);

        printf(" %12.2e %12.2lf %12.2e", t, ops/t, err);

        printf("\n");
    }

    return 0;
}