1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
     111
     112
     113
     114
     115
     116
     117
     118
     119
     120
     121
     122
     123
     124
     125
     126
     127
     128
     129
     130
     131
     132
     133
     134
     135
     136
     137
     138
#include <stdio.h>
#include <bench/aux.h>
#include <bench/refblas.h>
#include <bench/errbound.h>
#include <ulmblas/level1.h>
#include <ulmblas/level2.h>
#include <ulmblas/level3.h>

//------------------------------------------------------------------------------

#ifndef MIN_M
#define MIN_M 100
#endif

#ifndef MAX_M
#define MAX_M 1500
#endif

#ifndef INC_M
#define INC_M 100
#endif

#ifndef MIN_N
#define MIN_N 100
#endif

#ifndef MAX_N
#define MAX_N 1500
#endif

#ifndef INC_N
#define INC_N 100
#endif

#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif

#ifndef ALPHA
#define ALPHA 2.5
#endif

#ifndef UNITDIAG
#define UNITDIAG 0
#endif

#if (ROWMAJOR==1)
#   define INCROW_A  MAX_M
#   define INCCOL_A  1
#else
#   define INCROW_A  1
#   define INCCOL_A  MAX_M
#endif

#if (ROWMAJOR==1)
#   define INCROW_B  MAX_N
#   define INCCOL_B  1
#else
#   define INCROW_B  1
#   define INCCOL_B  MAX_M
#endif

double A[MAX_M*MAX_M];
double B[MAX_M*MAX_M];
double X0[MAX_M*MAX_N];
double X1[MAX_M*MAX_N];

int
main()
{
    int m, n;

    randGeMatrix(MAX_M, MAX_M, A, INCROW_A, INCCOL_A);
    makeTrlDiagDom(MAX_M, UNITDIAG, A, INCROW_A, INCCOL_A);
    randGeMatrix(MAX_M, MAX_N, B, INCROW_B, INCCOL_B);

    printf("#UNITDIAG=%3d\n", UNITDIAG);
    printf("# %51s %25s\n", "dtrlsv_ref", "dtrlsv_row");
    printf("#%9s %9s %9s", "n", "INCROW_A", "INCCOL_A");
    printf(" %9s %9s", "INCROW_B", "INCCOL_B");
    printf(" %12s %12s", "t", "MFLOPS");
    printf(" %12s %12s %12s", "t", "MFLOPS", "err");
    printf("\n");

    for (m=MIN_M, n=MIN_N; m<=MAX_M && n<=MAX_N; m+=INC_M, n+=INC_N) {
        int     runs = 1;
        double  ops = (double)n*m*(m+1)/2 + (double)n*m*(m-1)/2;
        double  t0, dt, err;

        printf(" %9d %9d %9d", n, INCROW_A, INCCOL_A);
        printf(" %9d %9d", INCROW_B, INCCOL_B);

        t0   = 0;
        runs = 0;
        do {
            dgecopy(m, n, B, INCROW_B, INCCOL_B, X0, INCROW_B, INCCOL_B);
            dt = walltime();

            dtrlsm_ref(m, n, (double)ALPHA, UNITDIAG,
                       A, INCROW_A, INCCOL_A,
                       X0, INCROW_B, INCCOL_B);

            dt = walltime() - dt;
            t0 += dt;
            ++runs;
        } while (t0<0.3);
        t0 /= runs;

        printf(" %12.2e %12.2lf", t0, ops/(1000*1000*t0));

        t0   = 0;
        runs = 0;
        do {
            dgecopy(m, n, B, INCROW_B, INCCOL_B, X1, INCROW_B, INCCOL_B);
            dt = walltime();

            dtrlsm(m, n, (double)ALPHA, UNITDIAG,
                   A, INCROW_A, INCCOL_A,
                   X1, INCROW_B, INCCOL_B);

            dt = walltime() - dt;
            t0 += dt;
            ++runs;
        } while (t0<0.3);
        t0 /= runs;

        err = err_dtrsm(m, n, ALPHA, UNITDIAG, 1,
                        A, INCROW_A, INCCOL_A,
                        X0, INCROW_B, INCCOL_B,
                        X1, INCROW_B, INCCOL_B);

        printf(" %12.2e %12.2lf %12.2e", t0, ops/(1000*1000*t0), err);
        printf("\n");

    }

    return 0;
}