Content

Overloaded Operators VS. Function Calls

There are two aspects:

In FLENS overloaded operators just provide a convenient notation for BLAS. But both are equivalent. We first show an implementation of the conjugated gradient method that uses overloaded operators. Then we show an implementation that calls the BLAS functions directly.

Using the previously introduced debug mode you can check yourself that both implementations are doing exactly the same. It's basically a matter of taste and background what variant is more favorable for you.

Conjugated Gradient Method with Overloaded Operators

The following implementation is almost identical to the pseudo-code of the algorithm.

#ifndef FLENS_EXAMPLES_CG_H
#define FLENS_EXAMPLES_CG_H 1

#include <flens/flens.h>
#include <limits>

template <typename MA, typename VX, typename VB>
int
cg(const MA &A, const VB &b, VX &&x,
   double tol = std::numeric_limits<double>::epsilon(),
   int    maxIterations = std::numeric_limits<int>::max())
{
    typedef typename VB::ElementType  ElementType;
    typedef typename VB::IndexType    IndexType;
    typedef typename VB::NoView       VectorType;

    ElementType  alpha, beta, rNormSquare, rNormSquarePrev;
    VectorType   Ap, r, p;

    r = b - A*x;
    p = r;
    rNormSquare = r*r;
    for (int k=1; k<=maxIterations; ++k) {
        if (sqrt(rNormSquare)<=tol) {
            return k-1;
        }
        Ap = A*p;
        alpha = rNormSquare/(p * Ap);
        x += alpha*p;
        r -= alpha*Ap;

        rNormSquarePrev = rNormSquare;
        rNormSquare = r*r;
        beta = rNormSquare/rNormSquarePrev;
        p = beta*p + r;
    }
    return maxIterations;
}

#endif // FLENS_EXAMPLES_CG_H

Conjugated Gradient Method with Explicit Function Calls

This is the same algorithm but we call the BLAS functions explicitly:

#ifndef FLENS_EXAMPLES_CG_BLAS_H
#define FLENS_EXAMPLES_CG_BLAS_H 1

#include <flens/flens.h>
#include <limits>

template <typename MA, typename VX, typename VB>
int
cg(const MA &A, const VB &b, VX &&x,
   double tol = std::numeric_limits<double>::epsilon(),
   int    maxIterations = std::numeric_limits<int>::max())
{
    using namespace flens;

    typedef typename VB::ElementType  ElementType;
    typedef typename VB::IndexType    IndexType;
    typedef typename VB::NoView       VectorType;

    ElementType  alpha, beta, rNormSquare, rNormSquarePrev;
    VectorType   Ap, r, p;

    const ElementType  Zero(0), One(1);

    blas::copy(b, r);
    blas::mv(NoTrans, -One, A, x, One, r);

    blas::copy(r, p);

    rNormSquare = blas::dot(r, r);

    for (int k=1; k<=maxIterations; ++k) {
        std::cout << "k = " << k << std::endl;
        if (sqrt(rNormSquare)<=tol) {
            return k-1;
        }

        blas::mv(NoTrans, One, A, p, Zero, Ap);

        alpha = rNormSquare/blas::dot(p, Ap);

        blas::axpy(alpha, p, x);

        blas::axpy(-alpha, Ap, r);

        rNormSquarePrev = rNormSquare;

        rNormSquare = blas::dot(r, r);

        beta = rNormSquare/rNormSquarePrev;

        blas::scal(beta, p);
        blas::axpy(One, r, p);
    }
    return maxIterations;
}

#endif // FLENS_EXAMPLES_CG_BLAS_H

Let's check the equivalence of overloaded operation notation and explicit function calls step-by-step:

r = b - A*x;

    blas::copy(b, r);
    blas::mv(NoTrans, -One, A, x, One, r);

p = r;

    blas::copy(r, p);

rNormSquare = r*r;

    rNormSquare = blas::dot(r, r);

Ap = A*p;

        blas::mv(NoTrans, One, A, p, Zero, Ap);

alpha = rNormSquare/(p * Ap);

        alpha = rNormSquare/blas::dot(p, Ap);

x += alpha*p;

        blas::axpy(alpha, p, x);

r -= alpha*Ap;

        blas::axpy(-alpha, Ap, r);

rNormSquare = r*r;

        rNormSquare = blas::dot(r, r);

p = beta*p + r;

        blas::scal(beta, p);
        blas::axpy(One, r, p);
    }
    return maxIterations;
}

Test Example

#include <cmath>
#include <iostream>
#include <flens/flens.cxx>

#ifndef USE_CG_BLAS
#include <flens/examples/cg.h>
#else
#include <flens/examples/cg_blas.h>
#endif

using namespace flens;
using namespace std;

int
main()
{
    const int n = 5;

    SyMatrix<FullStorage<double> >   A(n, Upper);
    A.diag(0) = 2;
    A.diag(1) = -1;

    cout << "A = " << A << endl;
    DenseVector<Array<double> > x(n), z(n), b(n);
    z = 1;
    b = A*z;

    cg(A, b, x);
    cout << "x = " << x << endl;

    x -= z;
    cout << "max abs error = " << abs(x(blas::iamax(x))) << std::endl;
}

Compile and Run

Just to check, we use both variants of the cg-method: