#include <ulmblas.h>
#include <math.h>
void
daxpy(size_t n, double alpha,
const double *x, ptrdiff_t incX,
double *y, ptrdiff_t incY)
{
for (size_t i=0; i<n; ++i) {
y[i*incY] += alpha*x[i*incX];
}
}
double
ddot(size_t n,
const double *x, ptrdiff_t incX,
const double *y, ptrdiff_t incY)
{
double result = 0;
for (size_t i=0; i<n; ++i) {
result += x[i*incX]*y[i*incY];
}
return result;
}
void
dscal(size_t n,
double alpha,
double *x, ptrdiff_t incX)
{
if (alpha==1) {
return;
}
if (alpha!=0) {
for (size_t i=0; i<n; ++i) {
x[i*incX] *= alpha;
}
} else {
for (size_t i=0; i<n; ++i) {
x[i*incX] = 0;
}
}
}
size_t
idamax(size_t n, const double *x, ptrdiff_t incX)
{
double max = 0;
size_t I = 0;
for (size_t i=0; i<n; ++i) {
if (fabs(x[i*incX])>max) {
I = i;
max = fabs(x[i*incX]);
}
}
return I;
}
void
dswap(size_t n, double *x, ptrdiff_t incX, double *y, ptrdiff_t incY)
{
for (size_t i=0; i<n; ++i) {
double tmp = x[i*incX];
x[i*incX] = y[i*incY];
y[i*incY] = tmp;
}
}
void
dger(size_t m, size_t n,
double alpha,
const double *x, ptrdiff_t incX,
const double *y, ptrdiff_t incY,
double *A, ptrdiff_t incRowA, ptrdiff_t incColA)
{
if (alpha==0 || m==0 || n==0) {
return;
}
if (incRowA>incColA) {
dger(n, m, alpha, y, incY, x, incX, A, incColA, incRowA);
return;
}
for (size_t j=0; j<n; ++j) {
for (size_t i=0; i<m; ++i) {
A[i*incRowA+j*incColA] += alpha*x[i*incX]*y[j*incY];
}
}
}
void
dtrsv(size_t n, bool lower, bool unit,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
double *x, ptrdiff_t incX)
{
if (lower) {
if (incRowA<incColA) {
for (size_t j=0; j<n; ++j) {
if (!unit) {
x[j*incX] /= A[j*(incRowA+incColA)];
}
daxpy(n-1-j, -x[j*incX], &A[(j+1)*incRowA+j*incColA], incRowA,
&x[(j+1)*incX], incX);
}
} else {
for (size_t j=0; j<n; ++j) {
x[j*incX] -= ddot(j, &A[j*incRowA], incColA, x, incX);
if (!unit) {
x[j*incX] /= A[j*(incRowA+incColA)];
}
}
}
} else {
if (incRowA<incColA) {
for (size_t j=n; j-- >0; ) {
if (!unit) {
x[j*incX] /= A[j*(incRowA+incColA)];
}
daxpy(j, -x[j*incX], &A[j*incColA], incRowA,
x, incX);
}
} else {
for (size_t j=n; j-- >0; ) {
x[j*incX] -= ddot(n-1-j,
&A[j*incRowA+(j+1)*incColA], incColA,
&x[(j+1)*incX], incX);
if (!unit) {
x[j*incX] /= A[j*(incRowA+incColA)];
}
}
}
}
}
ptrdiff_t
dgetrf(size_t m, size_t n,
double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
size_t *p, ptrdiff_t incP)
{
size_t k = m<n ? m : n;
for (size_t j=0; j<k; ++j) {
size_t pj = j + idamax(m-j, &A[j*incRowA + j*incColA], incRowA);
if (j!=pj) {
dswap(n, &A[j*incRowA], incColA, &A[pj*incRowA], incColA);
}
p[j*incP] = pj;
double alpha = A[j*incRowA + j*incColA];
if (alpha==0) {
return j;
} else {
alpha = 1/alpha;
}
dscal(m-1-j, alpha, &A[(j+1)*incRowA + j*incColA], incRowA);
dger(m-1-j, n-1-j, -1,
&A[(j+1)*incRowA + j *incColA], incRowA,
&A[ j *incRowA + (j+1)*incColA], incColA,
&A[(j+1)*incRowA + (j+1)*incColA], incRowA, incColA);
}
return -1;
}