#include <ulmblas/level1.h>
#include <ulmblas/level2.h>
//-- LU factorization ----------------------------------------------------------
void
lu_unblk_var1(int m, int n, double *A, int incRowA, int incColA)
{
int min_mn = (m<n) ? m : n;
int max_mn = (m>n) ? m : n;
int j;
for (j=1; j<max_mn; ++j) {
int k = (min_mn < j) ? min_mn : j;
if (j<n) {
dtrlsv(k, 1,
A, incRowA, incColA,
&A[j*incColA], incRowA);
}
if (j<m) {
dtrlsv(k, 0,
A, incColA, incRowA,
&A[j*incRowA], incColA);
}
if (j<m && j<n) {
A[j*(incRowA+incColA)] -= ddot(j,
&A[j*incRowA], incColA,
&A[j*incColA], incRowA);
}
}
}
void
lu_unblk_var2(int m, int n, double *A, int incRowA, int incColA)
{
int min_mn = (m<n) ? m : n;
int j;
for (j=1; j<m; ++j) {
int k = (min_mn < j) ? min_mn : j;
if (j<m) {
dtrlsv(k, 0,
A, incColA, incRowA,
&A[j*incRowA], incColA);
}
if (j<n) {
dgemv(n-j, j,
-1.0,
&A[j*incColA], incColA, incRowA,
&A[j*incRowA], incColA,
1.0,
&A[j*(incRowA+incColA)], incColA);
}
}
}
void
lu_unblk_var3(int m, int n, double *A, int incRowA, int incColA)
{
int min_mn = (m<n) ? m : n;
int j;
for (j=0; j<n; ++j) {
int k = (min_mn < j) ? min_mn : j;
dtrlsv(k, 1,
A, incRowA, incColA,
&A[j*incColA], incRowA);
if (j<m) {
dgemv(m-j, j,
-1.0,
&A[j*incRowA], incRowA, incColA,
&A[j*incColA], incRowA,
1.0,
&A[j*(incRowA+incColA)], incRowA);
if (j+1<m) {
dscal(m-j-1,
1.0/A[j*(incRowA+incColA)],
&A[(j+1)*incRowA+j*incColA], incRowA);
}
}
}
}
void
lu_unblk_var4(int m, int n, double *A, int incRowA, int incColA)
{
int min_mn = (m<n) ? m : n;
int j;
for (j=0; j<min_mn; ++j) {
dgemv(n-j, j,
-1.0,
&A[j*incColA], incColA, incRowA,
&A[j*incRowA], incColA,
1.0,
&A[j*(incRowA+incColA)], incColA);
if (j+1<m) {
dgemv(m-j-1, j,
-1.0,
&A[(j+1)*incRowA], incRowA, incColA,
&A[j*incColA], incRowA,
1.0,
&A[(j+1)*incRowA+j*incColA], incRowA);
dscal(m-j-1,
1.0/A[j*(incRowA+incColA)],
&A[(j+1)*incRowA+j*incColA], incRowA);
}
}
}
void
lu_unblk_var5(int m, int n, double *A, int incRowA, int incColA)
{
int min_mn = (m<n) ? m : n;
int j;
for (j=0; j<min_mn; ++j) {
dscal(m-j-1,
1.0/A[j*(incRowA+incColA)],
&A[(j+1)*incRowA+j*incColA], incRowA);
dger(m-j-1, n-j-1,
-1.0,
&A[(j+1)*incRowA+j*incColA], incRowA,
&A[j*incRowA+(j+1)*incColA], incColA,
&A[(j+1)*(incRowA+incColA)], incRowA, incColA);
}
}