#include <stdio.h>
#include <bench/aux.h>
#include <bench/refblas.h>
#include <bench/errbound.h>
#include <ulmblas/level1.h>
#include <ulmblas/level2.h>
#include <ulmblas/level3.h>
#ifndef MIN_M
#define MIN_M 100
#endif
#ifndef MAX_M
#define MAX_M 1500
#endif
#ifndef INC_M
#define INC_M 100
#endif
#ifndef MIN_N
#define MIN_N 100
#endif
#ifndef MAX_N
#define MAX_N 1500
#endif
#ifndef INC_N
#define INC_N 100
#endif
#ifndef ROWMAJOR
#define ROWMAJOR 0
#endif
#ifndef ALPHA
#define ALPHA 2.5
#endif
#ifndef UNITDIAG
#define UNITDIAG 0
#endif
#if (ROWMAJOR==1)
# define INCROW_A MAX_M
# define INCCOL_A 1
#else
# define INCROW_A 1
# define INCCOL_A MAX_M
#endif
#if (ROWMAJOR==1)
# define INCROW_B MAX_N
# define INCCOL_B 1
#else
# define INCROW_B 1
# define INCCOL_B MAX_M
#endif
double A[MAX_M*MAX_M];
double B[MAX_M*MAX_M];
double X0[MAX_M*MAX_N];
double X1[MAX_M*MAX_N];
int
main()
{
int m, n;
randGeMatrix(MAX_M, MAX_M, A, INCROW_A, INCCOL_A);
makeTrlDiagDom(MAX_M, UNITDIAG, A, INCROW_A, INCCOL_A);
randGeMatrix(MAX_M, MAX_N, B, INCROW_B, INCCOL_B);
printf("#UNITDIAG=%3d\n", UNITDIAG);
printf("# %51s %25s\n", "dtrlsv_ref", "dtrlsv_row");
printf("#%9s %9s %9s", "n", "INCROW_A", "INCCOL_A");
printf(" %9s %9s", "INCROW_B", "INCCOL_B");
printf(" %12s %12s", "t", "MFLOPS");
printf(" %12s %12s %12s", "t", "MFLOPS", "err");
printf("\n");
for (m=MIN_M, n=MIN_N; m<=MAX_M && n<=MAX_N; m+=INC_M, n+=INC_N) {
int runs = 1;
double ops = (double)n*m*(m+1)/2 + (double)n*m*(m-1)/2;
double t0, dt, err;
printf(" %9d %9d %9d", n, INCROW_A, INCCOL_A);
printf(" %9d %9d", INCROW_B, INCCOL_B);
t0 = 0;
runs = 0;
do {
dgecopy(m, n, B, INCROW_B, INCCOL_B, X0, INCROW_B, INCCOL_B);
dt = walltime();
dtrlsm_ref(m, n, (double)ALPHA, UNITDIAG,
A, INCROW_A, INCCOL_A,
X0, INCROW_B, INCCOL_B);
dt = walltime() - dt;
t0 += dt;
++runs;
} while (t0<0.3);
t0 /= runs;
printf(" %12.2e %12.2lf", t0, ops/(1000*1000*t0));
t0 = 0;
runs = 0;
do {
dgecopy(m, n, B, INCROW_B, INCCOL_B, X1, INCROW_B, INCCOL_B);
dt = walltime();
dtrlsm(m, n, (double)ALPHA, UNITDIAG,
A, INCROW_A, INCCOL_A,
X1, INCROW_B, INCCOL_B);
dt = walltime() - dt;
t0 += dt;
++runs;
} while (t0<0.3);
t0 /= runs;
err = err_dtrsm(m, n, ALPHA, UNITDIAG, 1,
A, INCROW_A, INCCOL_A,
X0, INCROW_B, INCCOL_B,
X1, INCROW_B, INCCOL_B);
printf(" %12.2e %12.2lf %12.2e", t0, ops/(1000*1000*t0), err);
printf("\n");
}
return 0;
}