#include <ulmblas/level1.h>
#include <ulmblas/level2.h>
//-- LU factorization ----------------------------------------------------------
void
lu_unblk_var1(int m, int n, double *A, int incRowA, int incColA)
{
    int min_mn = (m<n) ? m : n;
    int max_mn = (m>n) ? m : n;
    int j;
    for (j=1; j<max_mn; ++j) {
        int k = (min_mn < j) ? min_mn : j;
        if (j<n) {
            dtrlsv(k, 1,
                   A, incRowA, incColA,
                   &A[j*incColA], incRowA);
        }
        if (j<m) {
            dtrlsv(k, 0,
                   A, incColA, incRowA,
                   &A[j*incRowA], incColA);
        }
        if (j<m && j<n) {
            A[j*(incRowA+incColA)] -= ddot(j,
                                           &A[j*incRowA], incColA,
                                           &A[j*incColA], incRowA);
        }
    }
}
void
lu_unblk_var2(int m, int n, double *A, int incRowA, int incColA)
{
    int min_mn = (m<n) ? m : n;
    int j;
    for (j=1; j<m; ++j) {
        int k = (min_mn < j) ? min_mn : j;
        if (j<m) {
            dtrlsv(k, 0,
                   A, incColA, incRowA,
                   &A[j*incRowA], incColA);
        }
        if (j<n) {
            dgemv(n-j, j,
                  -1.0,
                  &A[j*incColA], incColA, incRowA,
                  &A[j*incRowA], incColA,
                  1.0,
                  &A[j*(incRowA+incColA)], incColA);
        }
    }
}
void
lu_unblk_var3(int m, int n, double *A, int incRowA, int incColA)
{
    int min_mn = (m<n) ? m : n;
    int j;
    for (j=0; j<n; ++j) {
        int k = (min_mn < j) ? min_mn : j;
        dtrlsv(k, 1,
               A, incRowA, incColA,
               &A[j*incColA], incRowA);
        if (j<m) {
            dgemv(m-j, j,
                  -1.0,
                  &A[j*incRowA], incRowA, incColA,
                  &A[j*incColA], incRowA,
                  1.0,
                  &A[j*(incRowA+incColA)], incRowA);
            if (j+1<m) {
                dscal(m-j-1,
                      1.0/A[j*(incRowA+incColA)],
                      &A[(j+1)*incRowA+j*incColA], incRowA);
            }
        }
    }
}
void
lu_unblk_var4(int m, int n, double *A, int incRowA, int incColA)
{
    int min_mn = (m<n) ? m : n;
    int j;
    for (j=0; j<min_mn; ++j) {
        dgemv(n-j, j,
              -1.0,
              &A[j*incColA], incColA, incRowA,
              &A[j*incRowA], incColA,
              1.0,
              &A[j*(incRowA+incColA)], incColA);
        if (j+1<m) {
            dgemv(m-j-1, j,
                  -1.0,
                  &A[(j+1)*incRowA], incRowA, incColA,
                  &A[j*incColA], incRowA,
                  1.0,
                  &A[(j+1)*incRowA+j*incColA], incRowA);
            dscal(m-j-1,
                  1.0/A[j*(incRowA+incColA)],
                  &A[(j+1)*incRowA+j*incColA], incRowA);
        }
    }
}
void
lu_unblk_var5(int m, int n, double *A, int incRowA, int incColA)
{
    int min_mn = (m<n) ? m : n;
    int j;
    for (j=0; j<min_mn; ++j) {
        dscal(m-j-1,
              1.0/A[j*(incRowA+incColA)],
              &A[(j+1)*incRowA+j*incColA], incRowA);
        dger(m-j-1, n-j-1,
             -1.0,
             &A[(j+1)*incRowA+j*incColA], incRowA,
             &A[j*incRowA+(j+1)*incColA], incColA,
             &A[(j+1)*(incRowA+incColA)], incRowA, incColA);
    }
}