#include <ulmaux.h>
#include <ulmblas.h>
#include <assert.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdio.h>
#include <math.h>
#include <float.h>
#include <string.h>
#ifndef DGETRF
#define DGETRF dgetrf
#endif
#ifndef SEED_RAND
#define SEED_RAND 0
#endif
#ifndef TOL_ERR
#define TOL_ERR 1
#endif
double
dgetrf_err(size_t m, size_t n,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
double *LU, ptrdiff_t incRowLU, ptrdiff_t incColLU,
const size_t *p, ptrdiff_t incP)
{
if (m==0 || n==0) {
return 0;
}
size_t k = m<n ? m : n;
double *L = malloc(m*k*sizeof(double));
double *U = malloc(k*n*sizeof(double));
if (!L || !U) {
abort();
}
ptrdiff_t ldL = m;
ptrdiff_t ldU = k;
for (size_t i=0; i<m; ++i) {
for (size_t j=0; j<k; ++j) {
L[i+j*ldL] = j<i ? LU[i*incRowLU+j*incColLU]
: j==i ? 1
: 0;
}
}
for (size_t i=0; i<k; ++i) {
for (size_t j=0; j<n; ++j) {
U[i+j*ldU] = j>=i ? LU[i*incRowLU+j*incColLU]
: 0;
}
}
for (size_t i=0; i<m; ++i) {
for (size_t j=0; j<n; ++j) {
LU[i*incRowLU + j*incColLU] = 0;
for (size_t l=0; l<k; ++l) {
LU[i*incRowLU+j*incColLU] += L[i+l*ldL] * U[l+j*ldU];
}
}
}
for (size_t i=k; i-- >0; ) {
if (p[i*incP]!=i) {
dswap(n,
&LU[i*incRowLU], incColLU,
&LU[p[i*incP]*incRowLU], incColLU);
}
}
double nrmA = dgenrm_inf(m, n, A, incRowA, incColA);
dgeaxpy(m, n, -1, A, incRowA, incColA, LU, incRowLU, incColLU);
double err = dgenrm_inf(m, n, LU, incRowLU, incColLU)
/ (k*nrmA*DBL_EPSILON);
free(L);
free(U);
return err;
}
bool
get_next_param(bool reset, size_t *m, size_t *n,
ptrdiff_t *incRowA, ptrdiff_t *incColA, ptrdiff_t *incP)
{
static size_t dim[] = {5, 0, 1, 32, 31};
static ptrdiff_t inc[] = {1, 2, -1, -2};
static bool colMajor;
static size_t idx_m;
static size_t idx_n;
static size_t idx_incRowA;
static size_t idx_incColA;
static size_t idx_incP;
if (reset) {
colMajor = false;
idx_m = 0;
idx_n = 0;
idx_incRowA = 0;
idx_incColA = 0;
idx_incP = 0;
}
*m = dim[idx_m];
*n = dim[idx_n];
*incP = inc[idx_incP];
if (colMajor) {
*incRowA = inc[idx_incRowA];
*incColA = dim[idx_m]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
} else {
*incRowA = dim[idx_n]*ptrdiff_abs(inc[idx_incRowA]*inc[idx_incColA]);
*incColA = inc[idx_incColA];
}
if (!colMajor) {
colMajor = true;
return true;
}
colMajor = false;
if (++idx_incRowA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incRowA = 0;
if (++idx_incColA < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incColA = 0;
if (++idx_m < sizeof(dim)/sizeof(size_t)) {
return true;
}
idx_m = 0;
if (++idx_n < sizeof(dim)/sizeof(size_t)) {
return true;
}
idx_n = 0;
if (++idx_incP < sizeof(inc)/sizeof(ptrdiff_t)) {
return true;
}
idx_incP = 0;
return false;
}
int
check_dgetrf(int argc, char **argv)
{
size_t m, n;
ptrdiff_t incRowA, incColA, incP;
bool more, reset = true;
unsigned seed_rand = SEED_RAND;
for (int i=0; i<argc; ++i) {
if (!strcmp(argv[i], "seed")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%u", &seed_rand)==1);
}
}
srand(seed_rand);
printf("%8s %8s %8s %8s %8s %8s %8s\n",
"m", "n", "incRowA", "incColA", "incP", "err", "res");
do {
more = get_next_param(reset, &m, &n, &incRowA, &incColA, &incP);
reset = false;
size_t mn = m<n ? m : n;
size_t bufsize_A = 1+m*ptrdiff_abs(incRowA) * n*ptrdiff_abs(incColA);
size_t bufsize_p = 1+mn*ptrdiff_abs(incP);
double *buf_A = malloc(bufsize_A*sizeof(double));
double *buf_LU = malloc(bufsize_A*sizeof(double));
size_t *buf_p = malloc(bufsize_p*sizeof(size_t));
if (!buf_A || !buf_LU || !buf_p) {
abort();
}
dfill_nan(bufsize_A, buf_A);
dfill_nan(bufsize_A, buf_LU);
ifill_rand(bufsize_p, buf_p);
double *A = buf_A;
double *LU = buf_LU;
size_t *p = buf_p;
if (incRowA<0) {
A -= m*incRowA;
LU -= m*incRowA;
}
if (incColA<0) {
A -= n*incColA;
LU -= n*incColA;
}
if (incP<0) {
p -= mn*incP;
}
randDGeMatrix(m, n, false, A, incRowA, incColA);
dgecopy(m, n, A, incRowA, incColA, LU, incRowA, incColA);
DGETRF(m, n, LU, incRowA, incColA, p, incP);
double err = dgetrf_err(m, n,
A, incRowA, incColA,
LU, incRowA, incColA,
p, incP);
bool pass = err<TOL_ERR;
if (!pass) {
printf("A =\n");
printDGeMatrix(m, n, A, incRowA, incColA);
printf("p=\n");
printIGeMatrix(1, mn, p, 0, incP);
printf("L*U - A = \n");
printfDGeMatrix("%10.1e ", m, n, LU, incRowA, incColA);
}
printf("%8zu %8zu %8td %8td %8td %7.1e %8s\n",
m, n, incRowA, incColA, incP,
err, pass ? "PASS" : "FAILED");
free(buf_A);
free(buf_LU);
free(buf_p);
if (!pass) {
return 1;
}
} while (more);
return 0;
}
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
#ifndef MAX_M
#define MAX_M 1000
#endif
#ifndef MAX_N
#define MAX_N 1000
#endif
#ifndef ALPHA
#define ALPHA 1
#endif
#ifndef BETA
#define BETA 1
#endif
void
bench_dgetrf(int argc, char **argv)
{
bool colmajor = COLMAJOR;
ptrdiff_t incP = 1;
ptrdiff_t incRowA_ = 1;
ptrdiff_t incColA_ = 1;
size_t min_m = 100;
size_t min_n = 100;
size_t inc_m = 100;
size_t inc_n = 100;
size_t max_m = MAX_M;
size_t max_n = MAX_N;
for (int i=0; i<argc; ++i) {
if (!strcmp(argv[i], "colmajor")) {
colmajor = true;
}
if (!strcmp(argv[i], "rowmajor")) {
colmajor = false;
}
if (!strcmp(argv[i], "incP")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incP)==1);
}
if (!strcmp(argv[i], "incRowA")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incRowA_)==1);
}
if (!strcmp(argv[i], "incColA")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &incColA_)==1);
}
if (!strcmp(argv[i], "min_m")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &min_m)==1);
}
if (!strcmp(argv[i], "min_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &min_n)==1);
}
if (!strcmp(argv[i], "max_m")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &max_m)==1);
}
if (!strcmp(argv[i], "max_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &max_n)==1);
}
if (!strcmp(argv[i], "inc_m")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &inc_m)==1);
}
if (!strcmp(argv[i], "inc_n")) {
assert(i+1<argc);
assert(sscanf(argv[i+1], "%td", &inc_n)==1);
}
}
srand(SEED_RAND);
printf("#colmajor = %d\n", colmajor);
printf("#incP = %td\n", incP);
size_t min_mn = max_m<max_n ? max_m : max_n;
size_t bufsize_A = max_m*ptrdiff_abs(incRowA_)
* max_n*ptrdiff_abs(incColA_) + 1;
size_t bufsize_p = min_mn*ptrdiff_abs(incP) + 1;
double *buf_A = malloc(bufsize_A*sizeof(double));
double *buf_LU = malloc(bufsize_A*sizeof(double));
size_t *buf_p = malloc(bufsize_p*sizeof(size_t));
if (!buf_A || !buf_LU || !buf_p) {
abort();
}
printf("#%7s %7s %7s %7s ", "m", "n", "incRowA", "incColA");
printf("%10s %10s %7s", "time", "mflops", "err");
printf("\n");
for (size_t m=min_m, n=min_n; m<=max_m && n<=max_n; m+=inc_m, n+=inc_n) {
size_t max_mn = m<n ? n : m;
size_t min_mn = m<n ? m : n;
double mflop = max_mn * min_mn * min_mn
-(min_mn * min_mn * min_mn) / 3.0
-(min_mn * min_mn) / 2.0;
mflop /= 1000000.0;
ptrdiff_t incRowA = incRowA_ * (colmajor ? 1 : n);
ptrdiff_t incColA = incColA_ * (colmajor ? m : 1);
double *A = buf_A;
double *LU = buf_LU;
size_t *p = buf_p;
if (incRowA<0) {
A -= m*incRowA;
LU -= m*incRowA;
}
if (incColA<0) {
A -= n*incColA;
LU -= n*incColA;
}
if (incP<0) {
p -= min_mn*incP;
}
randDGeMatrix(m, n, false, A, incRowA, incColA);
ifill_rand(bufsize_p, buf_p);
double t = 0;
size_t runs = 0;
while (t<0.5 || runs<3) {
dgecopy(m, n, A, incRowA, incColA, LU, incRowA, incColA);
double t0 = walltime();
DGETRF(m, n, LU, incRowA, incColA, p, incP);
t += walltime() - t0;
++runs;
}
t /= runs;
printf(" %7zu %7zu %7td %7td %10.2lf %10.2lf",
m, n, incRowA, incColA, t, mflop/t);
#ifndef BENCH_NOCHECK
double err = dgetrf_err(m, n,
A, incRowA, incColA,
LU, incRowA, incColA,
p, incP);
printf(" %7.1e", err);
#else
printf(" %7s", "---");
#endif
printf("\n");
fflush(stdout);
}
free(buf_A);
free(buf_LU);
free(buf_p);
}
int
main(int argc, char **argv)
{
if (argc<2 || (strcmp(argv[1], "check") && strcmp(argv[1], "bench"))) {
fprintf(stderr, "usage:\n");
fprintf(stderr, " %s check [seed]\n", argv[0]);
fprintf(stderr, " %s bench [colmajor | rowmajor]"
" [incP <value>]"
"\n", argv[0]);
return 1;
}
if (!strcmp(argv[1], "check")) {
return check_dgetrf(argc-2, argv+2);
}
if (!strcmp(argv[1], "bench")) {
bench_dgetrf(argc-2, argv+2);
}
}