#include <ulmblas/level2.h>
#include <ulmblas/level1.h>
//
// BLAS Level 2
//
#ifndef DGEMV_MxBS
#define DGEMV_MxBS 4
#endif
void
dgemv_mxBS(int m, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double *y, int incY)
{
int i, j;
if (alpha==0) {
return;
}
for (i=0; i<m; ++i) {
double dot = 0;
for (j=0; j<DGEMV_MxBS; ++j) {
dot += A[i*incRowA+j*incColA]*x[j*incX];
}
y[i*incY] += alpha*dot;
}
}
#ifndef DGEMV_BSxN
#define DGEMV_BSxN 4
#endif
void
dgemv_BSxn(int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
int i, j;
if (alpha==0) {
return;
}
if (beta!=1) {
for (i=0; i<DGEMV_BSxN; ++i) {
y[i*incY] *= beta;
}
}
for (j=0; j<n; ++j) {
for (i=0; i<DGEMV_BSxN; ++i) {
y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
}
}
}
//-- GEMV implementations ------------------------------------------------------
void
dgemv_unblk(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
if (incRowA<incColA) {
int j;
dscal(m, beta, y, incY);
for (j=0; j<n; ++j) {
daxpy(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
}
} else {
int i;
for (i=0; i<m; ++i) {
y[i*incY] *= beta;
y[i*incY] += alpha*ddot(n, &A[i*incRowA], incColA, x, incX);
}
}
}
void
dgemv(int m, int n, double alpha,
const double *A, int incRowA, int incColA,
const double *x, int incX,
double beta,
double *y, int incY)
{
if (incRowA<incColA) {
int bs = DGEMV_MxBS;
int j;
dscal(m, beta, y, incY);
for (j=0; j+bs<=n; j+=bs) {
dgemv_mxBS(m, alpha,
&A[j*incColA], incRowA, incColA,
&x[j*incX], incX,
y, incY);
}
dgemv_unblk(m, n-j, alpha,
&A[j*incColA], incRowA, incColA,
&x[j*incX], incX,
1.0,
y, incY);
} else {
int bs = DGEMV_BSxN;
int i;
for (i=0; i+bs<=m; i+=bs) {
dgemv_BSxn(n, alpha,
&A[i*incRowA], incRowA, incColA,
x, incX,
beta,
&y[i*incY], incY);
}
dgemv_unblk(m-i, n, alpha,
&A[i*incRowA], incRowA, incColA,
x, incX,
beta,
&y[i*incY], incY);
}
}
//-- TRMV implementations ------------------------------------------------------
void
dtrlmv_col(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
int k;
if (unitDiag) {
for (k=n-1; k>=0; --k) {
daxpy(n-k-1, x[k*incX],
&A[(k+1)*incRowA+k*incColA], incRowA,
&x[(k+1)*incX], incX);
}
} else {
for (k=n-1; k>=0; --k) {
daxpy(n-k-1, x[k*incX],
&A[(k+1)*incRowA+k*incColA], incRowA,
&x[(k+1)*incX], incX);
x[k*incX] *= A[k*(incRowA+incColA)];
}
}
}
void
dtrlmv_row(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
int k;
if (unitDiag) {
for (k=n-1; k>=0; --k) {
x[k*incX] += ddot(k, &A[k*incRowA], incColA, x, incX);
}
} else {
for (k=n-1; k>=0; --k) {
x[k*incX] = ddot(k+1, &A[k*incRowA], incColA, x, incX);
}
}
}
void
dtrlmv(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
if (incRowA<incColA) {
int bs = DGEMV_MxBS;
int k = (n/bs)*bs;
dtrlmv_col(n-k, unitDiag,
&A[k*(incRowA+incColA)], incRowA, incColA,
&x[k*incX], incX);
for (k-=bs; k>=0; k-=bs) {
dgemv_mxBS(n-k-bs, 1.0,
&A[(k+bs)*incRowA+k*incColA], incRowA, incColA,
&x[k*incX], incX,
&x[(k+bs)*incX], incX);
dtrlmv_col(bs, unitDiag,
&A[k*(incRowA+incColA)], incRowA, incColA,
&x[k*incX], incX);
}
} else {
double buffer[DGEMV_BSxN];
int bs = DGEMV_BSxN;
int k = (n/bs)*bs;
dgemv(n-k, k, 1.0,
&A[k*incRowA], incRowA, incColA,
x, incX,
0.0,
buffer,1);
dtrlmv_row(n-k, unitDiag,
&A[k*(incRowA+incColA)], incRowA, incColA,
&x[k*incX], incX);
daxpy(n-k, 1.0, buffer, 1, &x[k*incX], incX);
for (k-=bs; k>=0; k-=bs) {
dgemv_BSxn(k, 1.0,
&A[k*incRowA], incRowA, incColA,
x, incX,
0.0,
buffer, 1);
dtrlmv_col(bs, unitDiag,
&A[k*(incRowA+incColA)], incRowA, incColA,
&x[k*incX], incX);
daxpy(bs, 1.0, buffer, 1, &x[k*incX], incX);
}
}
}
//-- TRSV implementations ------------------------------------------------------
void
dtrlsv_row(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
int k;
for (k=0; k<n; ++k) {
x[k*incX] -= ddot(k, &A[k*incRowA], incColA, x, incX);
x[k*incX] /= (unitDiag) ? 1.0 : A[k*(incRowA+incColA)];
}
}
void
dtrlsv_col(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
int k;
for (k=0; k<n; ++k) {
x[k*incX] /= (unitDiag) ? 1.0 : A[k*(incRowA+incColA)];
daxpy(n-k-1, -x[k*incX],
&A[(k+1)*incRowA+k*incColA], incRowA,
&x[(k+1)*incX], incX);
}
}
void
dtrlsv(int n, int unitDiag,
const double *A, int incRowA, int incColA,
double *x, int incX)
{
if (incRowA<incColA) {
dtrlsv_col(n, unitDiag, A, incRowA, incColA, x, incX);
} else {
dtrlsv_row(n, unitDiag, A, incRowA, incColA, x, incX);
}
}
//-- GER implementations -------------------------------------------------------
void
dger_row(int m, int n, double alpha,
const double *x, int incX,
const double *y, int incY,
double *A, int incRowA, int incColA)
{
int i;
for (i=0; i<m; ++i) {
daxpy(n, alpha*x[i*incX], y, incY, &A[i*incRowA], incColA);
}
}
void
dger_col(int m, int n, double alpha,
const double *x, int incX,
const double *y, int incY,
double *A, int incRowA, int incColA)
{
int j;
for (j=0; j<n; ++j) {
daxpy(m, alpha*y[j*incY], x, incX, &A[j*incColA], incRowA);
}
}
void
dger(int m, int n, double alpha,
const double *x, int incX,
const double *y, int incY,
double *A, int incRowA, int incColA)
{
if (incRowA<incColA) {
dger_col(m, n, alpha, x, incX, y, incY, A, incRowA, incColA);
} else {
dger_row(m, n, alpha, x, incX, y, incY, A, incRowA, incColA);
}
}