Applying Pivots

Content

In general the LU factorization requires pivoting. The pivot vector

\[p = (p_0, \dots, p_k)\]

represents a \(k \times k\) permutation matrix \(P\) such that

\[P A = L U\]

In this exercise you are supposed to implement a function dlaswp to apply \(P\) and \(P^{-1}\) to a matrix \(A\):

perform the operation \(B \leftarrow P A\) or
the operation \(B \leftarrow P^{-1}A\).

Furthermore the function should be implemented such that only a sub-range of \(p\) is used. This depends on two integer bounds \(k_0\) and \(k_1\):

if \(k_0 < k_1\) it applies the permutation represented by \(p_{k_0},\dots,p_{k_1}\) and
otherwise the permutation represented by \(p_{k_1},\dots,p_{k_0}\).

Algorithm

The function receives a matrix \(A\) with \(n\) columns, a pivot vector \(p\) and the two indices \(k_0\) and \(k_1\). The number of rows in \(A\) and the length of \(p\) are not explicitly specified.

The function implements the following algorithm:

for \(k = k_0, \dots, k_1\)
- if \(k \neq p_k\)
  - interchange in \(A\) rows with index \(k\) and \(p_k\)

Skeleton for simple Example

session13/simple_dlaswp_ex.c

#include <stdio.h>          // for printf()
#include <stdlib.h>         // for malloc(), free(), rand(), srand()
#include <stddef.h>         // for size_t, ptrdiff_t
#include <stdbool.h>        // for typedef bool

//-- print matrix --------------------------------------------------------------

// general matrix
void
printDGeMatrix(size_t m, size_t n,
               const double *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%9.2lf ", A[i*incRowA + j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

void
printIGeMatrix(size_t m, size_t n,
               const size_t *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%6zu ", A[i*incRowA + j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

//-- required BLAS functions: daxpy, ddot, dtrsv -------------------------------

void
dswap(size_t n, double *x, ptrdiff_t incX, double *y, ptrdiff_t incY)
{
    for (size_t i=0; i<n; ++i) {
        double tmp = x[i*incX];
        x[i*incX] = y[i*incY];
        y[i*incY] = tmp;
    }
}

void
dlaswp(size_t n, size_t k0, size_t k1,
       const size_t *p, ptrdiff_t incP,
       double *A, ptrdiff_t incRowA, ptrdiff_t incColA)
{
    /* your code here */
}

//-- simple test program -------------------------------------------------------

#ifndef COLMAJOR
#define COLMAJOR 1
#endif

int
main()
{
    printf("COLMAJOR = %d\n", COLMAJOR);

    size_t      m       = 6;
    size_t      n       = 2;
    ptrdiff_t   incRowA = COLMAJOR ? 1 : n;
    ptrdiff_t   incColA = COLMAJOR ? m : 1;
    double      *A      = malloc(m*n*sizeof(double));

    size_t      k       = 4;
    ptrdiff_t   incP    = 1;
    size_t      *p      = malloc(k*sizeof(size_t));

    if (!A || !p) {
        abort();
    }

    // init A
    for (size_t j=0; j<n; ++j) {
        for (size_t i=0; i<m; ++i) {
            A[i*incRowA + j*incColA] =   i*n + j + 1;
        }
    }

    // init p handish (only elements with index between k0 and k1 are usable)
    size_t k0 = 2;
    size_t k1 = 3;

    p[0*incP] = -1;  // <- illegal pivot
    p[1*incP] = -1;  // <- illegal pivot
    p[2*incP] = 5;
    p[3*incP] = 4;

    printf("A =\n");
    printDGeMatrix(m, n, A, incRowA, incColA);

    printf("p =\n");
    printIGeMatrix(1, k, p, 0, incP);

    printf("dlaswp with k0 = %zu, k1 = %zu\n", k0, k1);
    dlaswp(n, k0, k1, p, incP, A, incRowA, incColA);

    printf("A =\n");
    printDGeMatrix(m, n, A, incRowA, incColA);

    printf("dlaswp with k0 = %zu, k1 = %zu\n", k1, k0);
    dlaswp(n, k1, k0, p, incP, A, incRowA, incColA);

    printf("A =\n");
    printDGeMatrix(m, n, A, incRowA, incColA);

    free(A);
    free(p);
}

Exercise

Make yourself familiar with the structure of the program. Compile and run it:

For testing the col major case compile it with

thales$ gcc -Wall -std=c11 -o simple_dlaswp_ex simple_dlaswp_ex.c
thales$

For testing the row major case compile it with

thales$ gcc -Wall -std=c11 -o simple_dlaswp_ex -DCOLMAJOR=0 simple_dlaswp_ex.c
thales$

What should the program print once function dlaswp is implemented?
Implement function dlaswp and test it.