Triangular Solver (for Matrix Vector Equations)

Content

Triangular Solver (for Matrix Vector Equations)
- Skeleton for simple Example
- Exercise:
Possible Solution
More rigorous Testing and Benchmarking
Possible Solution
Applying Pivots
Possible Solution

As an example we consider the system of linear equations \(Ax = b\) with

\[A =\left(\begin{array}{ccc} 4 & 5 & 6 \\ 1 & 2 & 3 \\ 7 & 8 & 1 \\ \end{array}\right)\quad\text{and}\quad b =\left(\begin{array}{c} 32 \\ 14 \\ 26 \\ \end{array}\right)\]

An \(LU\) factorization without pivoting is given by

\[L =\left(\begin{array}{ccc} 1 & 0 & 0 \\ \frac{1}{4} & 1 & 0 \\ \frac{7}{4} & -1 & 1 \\ \end{array}\right)\quad\text{and}\quad U =\left(\begin{array}{ccc} 4 & 5 & 6 \\ 0 & \frac{3}{4} & \frac{3}{2} \\ 0 & 0 & -8 \\ \end{array}\right)\quad\text{and}\]

Hence the problem \(Ax = LUx = b\) can be solved by first computing

\[y = L^{-1} b\]

and then

\[x = U^{-1} y\]

Skeleton for simple Example

session13/simple_dtrsv_ex.c

#include <stdio.h>          // for printf()
#include <stdlib.h>         // for malloc(), free(), rand(), srand()
#include <stddef.h>         // for size_t, ptrdiff_t
#include <stdbool.h>        // for typedef bool

//-- print matrix --------------------------------------------------------------

// general matrix
void
printDGeMatrix(size_t m, size_t n,
               const double *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%9.2lf ", A[i*incRowA + j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

// trapezoidal matrix
void
printDTrMatrix(size_t m, size_t n, bool lower, bool unit,
               const double *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            if (i==j) {
                printf("%9.2lf ", unit ? 1 : A[i*incRowA + j*incColA]);
                continue;
            }
            if ((lower && i>j) || (!lower && i<j)) {
                printf("%9.2lf ", A[i*incRowA + j*incColA]);
            } else {
                printf("%9.2lf ", 0.0);
            }
        }
        printf("\n");
    }
    printf("\n");
}

//-- required BLAS functions: daxpy, ddot, dtrsv -------------------------------

void
daxpy(size_t n, double alpha,
      const double *x, ptrdiff_t incX,
      double *y, ptrdiff_t incY)
{
    for (size_t i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

double
ddot(size_t n,
     const double *x, ptrdiff_t incX,
     const double *y, ptrdiff_t incY)
{
    double result = 0;

    for (size_t i=0; i<n; ++i) {
        result += x[i*incX]*y[i*incY];
    }

    return result;
}

void
dtrsv(size_t n, bool lower, bool unit,
      const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
      double *x, ptrdiff_t incX)
{
    /* Your code here */
}

//-- simple test program -------------------------------------------------------

#ifndef COLMAJOR
#define COLMAJOR 1
#endif

int
main()
{
    printf("COLMAJOR = %d\n", COLMAJOR);

    size_t      n       = 3;
    ptrdiff_t   incRowA = COLMAJOR ? 1 : n+1;
    ptrdiff_t   incColA = COLMAJOR ? n : 1;
    double      *A      = malloc(n*(n+1)*sizeof(double));

    if (!A) {
        abort();
    }

    // init A handish
    A[0*incRowA + 0*incColA] =   1;
    A[0*incRowA + 1*incColA] =   2;
    A[0*incRowA + 2*incColA] =   3;
    A[0*incRowA + 3*incColA] =   4;

    A[1*incRowA + 0*incColA] =   5;
    A[1*incRowA + 1*incColA] =   6;
    A[1*incRowA + 2*incColA] =   7;
    A[1*incRowA + 3*incColA] =   8;

    A[2*incRowA + 0*incColA] =   9;
    A[2*incRowA + 1*incColA] =  10;
    A[2*incRowA + 2*incColA] =  11;
    A[2*incRowA + 3*incColA] =  12;

    printf("A =\n");
    printDGeMatrix(n, n+1, A, incRowA, incColA);

    printf("b =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    printf("print A as lower unit trapezoidal matrix:\n");
    printf("L =\n");
    printDTrMatrix(n, n, true, true, A, incRowA, incColA);

    printf("print A as upper trapezoidal matrix:\n");
    printf("U =\n");
    printDTrMatrix(n, n, false, false, A, incRowA, incColA);

    //-- Solve L*y = b
    // Your code here: call dtrsv(...);

    printf("y =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    //-- Solve U*x = y
    // Your code here: call dtrsv(...);

    printf("x =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    free(A);
}

Exercise:

Solve the above system of linear equations with pencil and paper:
- Solve \(Ly = b\) with forward substitution.
- Solve \(Ux = y\) with backward substitution.

Make yourself familiar with the structure of the program. Compile and run it:

For testing the col major case compile it with

thales$ gcc -Wall -std=c11 -o simple_dtrsv_ex simple_dtrsv_ex.c
thales$

For testing the row major case compile it with

thales$ gcc -Wall -std=c11 -o simple_dtrsv_ex -DCOLMAJOR=0 simple_dtrsv_ex.c
thales$

Change the program such that \(A\) gets initialized as
\[A =\left(\begin{array}{rrrr}4 & 5 & 6 & 32 \\\frac{1}{4} & \frac{3}{4} & \frac{3}{2} & 14 \\\frac{7}{4} & -1 & -8 & 26 \\\end{array}\right)\]
Implement function dtrsv. You implementation should be optimized for the row major and the col major case:
- For col major cases it should be based on daxpy function calls.
- For row major cases it should be based on ddot function calls.