#include <ulmaux.h>
#include <ulmblas.h>
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <string.h>
#ifndef DTRSV
#define DTRSV dtrsv
#endif
#ifndef SEED_RAND
#define SEED_RAND 0
#endif
#ifndef TOL_ERR
#define TOL_ERR 1
#endif
void
dcopy_ref(size_t n,
const double *x, ptrdiff_t incX,
double *y, ptrdiff_t incY)
{
for (size_t i=0; i<n; ++i) {
y[i*incY] = x[i*incX];
}
}
void
dgemv_ref(size_t m, size_t n,
double alpha,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *x, ptrdiff_t incX,
double beta,
double *y, ptrdiff_t incY)
{
if (m==0 || n==0) {
return;
}
if (beta!=1) {
if (beta!=0) {
for (size_t i=0; i<m; ++i) {
y[i*incY] *= beta;
}
} else {
for (size_t i=0; i<m; ++i) {
y[i*incY] = 0;
}
}
}
if (alpha!=0) {
for (size_t j=0; j<n; ++j) {
for (size_t i=0; i<m; ++i) {
y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
}
}
}
}
void
dtrmv_ref(size_t n, bool lower, bool unit,
const double *A_, ptrdiff_t incRowA, ptrdiff_t incColA,
double *y, ptrdiff_t incY)
{
double *A = malloc(n*n*sizeof(double));
double *x = malloc(n*sizeof(double));
if (!A || !x) {
abort();
}
ptrdiff_t ldA = n;
for (size_t i=0; i<n; ++i) {
for (size_t j=0; j<n; ++j) {
A[i+j*ldA] = 0;
if (lower && j<i) {
A[i+j*ldA] = A_[i*incRowA+j*incColA];
}
if (!lower && j>i) {
A[i+j*ldA] = A_[i*incRowA+j*incColA];
}
if (i==j) {
A[i+j*ldA] = unit ? 1: A_[i*incRowA+j*incColA];
}
}
}
for (size_t i=0; i<n; ++i) {
x[i] = y[i*incY];
}
dgemv_ref(n, n, 1, A, 1, ldA, x, 1, 0, y, incY);
free(A);
free(x);
}
double
dtrsv_err(size_t n, bool lower, bool unit,
const double *A_, ptrdiff_t incRowA, ptrdiff_t incColA,
const double *b, ptrdiff_t incB,
const double *x0, ptrdiff_t incX0,
double *x, ptrdiff_t incX)
{
if (n==0) {
return 0;
}
daxpy(n, -1, x0, incX0, x, incX);
double nrmD = dgenrm_inf(n, 1, x, incX, 0);
if (isnan(nrmD)) {
return nrmD;
}
if (nrmD==0) {
return 0;
}
double *A = malloc(n*n*sizeof(double));
if (!A) {
abort();
}
ptrdiff_t ldA = n;
for (size_t i=0; i<n; ++i) {
for (size_t j=0; j<n; ++j) {
A[i+j*ldA] = 0;
if (lower && j<i) {
A[i+j*ldA] = A_[i*incRowA+j*incColA];
}
if (!lower && j>i) {
A[i+j*ldA] = A_[i*incRowA+j*incColA];
}
if (i==j) {
A[i+j*ldA] = unit ? 1: A_[i*incRowA+j*incColA];
}
}
}
double nrmA = dgenrm_inf(n, n, A, 1, ldA);
double nrmB = dgenrm_inf(n, 1, b, incB, 0);
if (nrmA<1) {
nrmA = 1;
}
if (nrmB<1) {
nrmB = 1;
}
double err = nrmD / (nrmA*nrmB*n*DBL_EPSILON);
free(A);
return err;
}
void
makeDDiagDom(size_t n, bool lower, bool unit,
double *A, ptrdiff_t incRowA, ptrdiff_t incColA)
{
for (size_t i=0; i<n; ++i) {
double scal = 0;
if (lower) {
for (size_t j=0; j<i; ++j) {
scal += fabs(A[i*incRowA+j*incColA]);
}
} else {
for (size_t j=i+1; j<n; ++j) {
scal += fabs(A[i*incRowA+j*incColA]);
}
}
scal = unit ? 1/scal : fabs(A[i*incRowA+i*incColA])/scal;
if (lower) {
for (size_t j=0; j<i; ++j) {
A[i*incRowA+j*incColA] *= scal;
}
} else {
for (size_t j=i+1; j<n; ++j) {
A[i*incRowA+j*incColA] *= scal;
}
}
}
}
bool
get_next_param(bool reset, size_t *n, bool *lower, bool *unit,
ptrdiff_t *incRowA, ptrdiff_t *incColA, ptrdiff_t *incX)
{
static size_t dim[] = {5, 0, 1, 32, 31};
static ptrdiff_t inc[] = {1, 2, -1, -2};
static bool colMajor;
static bool lower_;
static bool unit_;
static size_t idx_n;
static size_t idx_incRowA;
static size_t idx_incColA;
static size_t idx_incX;
if (reset) {
colMajor = false;
lower_ = true;
unit_ = false;
idx_n = 0;
idx_incRowA = 0;
idx_incColA = 0;
idx_incX = 0;
}
*n = dim[idx_n];
*incX = inc[idx_incX];
*lower = lower_;
*unit = unit_;
if (colMajor) {
*incRowA = inc[idx_incRowA];
*incColA = dim[idx_n]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
} else {
*incRowA = dim[idx_n]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
*incColA = inc[idx_incColA];
}
if (!colMajor) {
colMajor = true;
return true;
}
colMajor = false;
if (!lower_) {
lower_ = true;
return true;
}
lower_ = false;
if (!unit_) {
unit_ = true;
return true;
}
unit_ = false;
if (++idx_incRowA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incRowA = 0;
if (++idx_incColA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incColA = 0;
if (++idx_n < sizeof(dim)/sizeof(size_t)) {
return true;
}
idx_n = 0;
if (++idx_incX < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incX = 0;
return false;
}
void
check_dtrsv(int argc, char **argv)
{
size_t n;
ptrdiff_t incRowA, incColA, incX;
bool lower, unit;
bool more, reset = true;
unsigned seed_rand = SEED_RAND;
for (int i=0; i<argc; ++i) {
if (!strcmp(argv[i], "seed")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%u", &seed_rand)==1);
}
}
srand(seed_rand);
printf("%8s %8s %8s %8s %8s %8s %8s %8s\n",
"n", "lower", "unit", "incRowA", "incColA", "incX", "err", "res");
do {
more = get_next_param(reset, &n, &lower, &unit, &incRowA, &incColA,
&incX);
reset = false;
size_t bufsize_A = 1+n*ptrdiff_abs(incRowA) * n*ptrdiff_abs(incColA);
size_t bufsize_x = 1+n*ptrdiff_abs(incX);
double *buf_A = malloc(bufsize_A*sizeof(double));
double *buf_x0 = malloc(bufsize_x*sizeof(double));
double *buf_b = malloc(bufsize_x*sizeof(double));
double *buf_x = malloc(bufsize_x*sizeof(double));
if (!buf_A || !buf_x0 || !buf_b || !buf_x) {
abort();
}
dfill_nan(bufsize_A, buf_A);
dfill_nan(bufsize_x, buf_x0);
dfill_nan(bufsize_x, buf_b);
dfill_nan(bufsize_x, buf_x);
double *A = buf_A;
double *x0 = buf_x0;
double *b = buf_b;
double *x = buf_x;
if (incRowA<0) {
A -= n*incRowA;
}
if (incColA<0) {
A -= n*incColA;
}
if (incX<0) {
x0 -= n*incX;
b -= n*incX;
x -= n*incX;
}
randDGeMatrix(n, n, false, A, incRowA, incColA);
makeDDiagDom(n, lower, unit, A, incRowA, incColA);
randDGeMatrix(n, 1, false, x0, incX, 0);
dgecopy(n, 1, x0, incX, 0, b, incX, 0);
dtrmv_ref(n, lower, unit, A, incRowA, incColA, b, incX);
dgecopy(n, 1, b, incX, 0, x, incX, 0);
DTRSV(n, lower, unit, A, incRowA, incColA, x, incX);
double err = dtrsv_err(n, lower, unit,
A, incRowA, incColA,
b, incX,
x0, incX,
x, incX);
bool pass = err<TOL_ERR;
if (!pass) {
printf("A =\n");
printDGeMatrix(n, n, A, incRowA, incColA);
printf("b =\n");
printDGeMatrix(1, n, b, 0, incX);
printf("x0 =\n");
printDGeMatrix(1, n, x0, 0, incX);
printf("x - x0 =\n");
printfDGeMatrix("%e ", 1, n, x, 0, incX);
}
printf("%8zu %8s %8s %8td %8td %8td %7.1e %8s\n",
n,
lower ? "true" : "false",
unit ? "true" : "false",
incRowA, incColA, incX,
err, pass ? "PASS" : "FAILED");
free(buf_A);
free(buf_x0);
free(buf_b);
free(buf_x);
if (!pass) {
break;
}
} while (more);
}
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
#ifndef MAX_N
#define MAX_N 1000
#endif
void
bench_dtrsv(int argc, char **argv)
{
bool colmajor = COLMAJOR;
bool lower = true;
bool unit = false;
ptrdiff_t incX = 1;
ptrdiff_t incRowA_ = 1;
ptrdiff_t incColA_ = 1;
size_t min_n = 100;
size_t inc_n = 100;
size_t max_n = MAX_N;
for (int i=0; i<argc; ++i) {
if (!strcmp(argv[i], "colmajor")) {
colmajor = true;
}
if (!strcmp(argv[i], "rowmajor")) {
colmajor = false;
}
if (!strcmp(argv[i], "lower")) {
assert(i+1<argc);
int tmp;
assert(sscanf(argv[i+1], "%d", &tmp)==1);
lower = tmp;
}
if (!strcmp(argv[i], "unit")) {
assert(i+1<argc);
int tmp;
assert(sscanf(argv[i+1], "%d", &tmp)==1);
unit = tmp;
}
if (!strcmp(argv[i], "incX")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incX)==1);
}
if (!strcmp(argv[i], "incRowA")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incRowA_)==1);
}
if (!strcmp(argv[i], "incColA")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incColA_)==1);
}
if (!strcmp(argv[i], "min_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &min_n)==1);
}
if (!strcmp(argv[i], "max_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &max_n)==1);
}
if (!strcmp(argv[i], "inc_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &inc_n)==1);
}
}
srand(SEED_RAND);
printf("#colmajor = %d\n", colmajor);
printf("#incX = %td\n", incX);
printf("#lower = %d\n", lower);
printf("#unit = %d\n", unit);
size_t bufsize_A = max_n*ptrdiff_abs(incRowA_)
* max_n*ptrdiff_abs(incColA_) + 1;
size_t bufsize_x = max_n*ptrdiff_abs(incX) + 1;
double *buf_A = malloc(bufsize_A*sizeof(double));
double *buf_x0 = malloc(bufsize_x*sizeof(double));
double *buf_b = malloc(bufsize_x*sizeof(double));
double *buf_x = malloc(bufsize_x*sizeof(double));
if (!buf_A || !buf_x0 || !buf_b || !buf_x) {
abort();
}
printf("%8s %8s %8s %8s %8s %8s %8s\n",
"n", "incRowA", "incColA", "incX", "time", "flops", "err");
printf("\n");
for (size_t n=min_n; n<=max_n; n+=inc_n) {
double mflop = n*(n+1)/2 + (n-1)*n/2;
mflop /= 1000000.0;
ptrdiff_t incRowA = incRowA_ * (colmajor ? 1 : n);
ptrdiff_t incColA = incColA_ * (colmajor ? n : 1);
double *A = buf_A;
double *x0 = buf_x0;
double *b = buf_b;
double *x = buf_x;
if (incRowA<0) {
A -= n*incRowA;
}
if (incColA<0) {
A -= n*incColA;
}
if (incX<0) {
x0 -= n*incX;
b -= n*incX;
x -= n*incX;
}
randDGeMatrix(n, n, false, A, incRowA, incColA);
makeDDiagDom(n, lower, unit, A, incRowA, incColA);
randDGeMatrix(n, 1, false, x0, incX, 0);
double t = 0;
size_t runs = 0;
dcopy_ref(n, x0, incX, b, incX);
dtrmv_ref(n, lower, unit, A, incRowA, incColA, b, incX);
while (t<0.1 || runs<3) {
dgecopy(n, 1, b, incX, 0, x, incX, 0);
double t0 = walltime();
DTRSV(n, lower, unit, A, incRowA, incColA, x, incX);
t += walltime() - t0;
++runs;
}
t /= runs;
printf(" %8zu %8td %8td %8td %10.2lf %10.2lf",
n, incRowA, incColA, incX, t, mflop/t);
#ifndef BENCH_NOCHECK
double err = dtrsv_err(n, lower, unit,
A, incRowA, incColA,
b, incX,
x0, incX,
x, incX);
printf(" %7.1e", err);
#else
printf(" %7s", "---");
#endif
printf("\n");
fflush(stdout);
}
free(buf_A);
free(buf_x0);
free(buf_b);
free(buf_x);
}
int
main(int argc, char **argv)
{
if (argc<2 || (strcmp(argv[1], "check") && strcmp(argv[1], "bench"))) {
fprintf(stderr, "usage:\n");
fprintf(stderr, " %s check [seed]\n", argv[0]);
fprintf(stderr, " %s bench [colmajor | rowmajor]"
" [incP <value>]"
"\n", argv[0]);
return 1;
}
if (!strcmp(argv[1], "check")) {
check_dtrsv(argc-2, argv+2);
}
if (!strcmp(argv[1], "bench")) {
bench_dtrsv(argc-2, argv+2);
}
}