Content |
Overloaded Operators VS. Function Calls
There are two aspects:
-
Overloaded operators provide an expressive notation for mathematical operations.
-
Function calls have the advantage that you exactly know what is going on and how the computation gets done.
In FLENS overloaded operators just provide a convenient notation for BLAS. But both are equivalent. We first show an implementation of the conjugated gradient method that uses overloaded operators. Then we show an implementation that calls the BLAS functions directly.
Using the previously introduced debug mode you can check yourself that both implementations are doing exactly the same. It's basically a matter of taste and background what variant is more favorable for you.
Conjugated Gradient Method with Overloaded Operators
The following implementation is almost identical to the pseudo-code of the algorithm.
#define FLENS_EXAMPLES_CG_H 1
#include <flens/flens.h>
#include <cxxstd/limits.h>
template <typename MA, typename VX, typename VB>
int
cg(const MA &A, const VB &b, VX &&x,
double tol = std::numeric_limits<double>::epsilon(),
int maxIterations = std::numeric_limits<int>::max())
{
typedef typename VB::ElementType ElementType;
typedef typename VB::NoView VectorType;
ElementType alpha, beta, rNormSquare, rNormSquarePrev;
VectorType Ap, r, p;
r = b - A*x;
p = r;
rNormSquare = r*r;
for (int k=1; k<=maxIterations; ++k) {
if (sqrt(rNormSquare)<=tol) {
return k-1;
}
Ap = A*p;
alpha = rNormSquare/(p * Ap);
x += alpha*p;
r -= alpha*Ap;
rNormSquarePrev = rNormSquare;
rNormSquare = r*r;
beta = rNormSquare/rNormSquarePrev;
p = beta*p + r;
}
return maxIterations;
}
#endif // FLENS_EXAMPLES_CG_H
Conjugated Gradient Method with Explicit Function Calls
This is the same algorithm but we call the BLAS functions explicitly:
#define FLENS_EXAMPLES_CG_BLAS_H 1
#include <flens/flens.h>
#include <cxxstd/limits.h>
template <typename MA, typename VX, typename VB>
int
cg(const MA &A, const VB &b, VX &&x,
double tol = std::numeric_limits<double>::epsilon(),
int maxIterations = std::numeric_limits<int>::max())
{
using namespace flens;
typedef typename VB::ElementType ElementType;
typedef typename VB::IndexType IndexType;
typedef typename VB::NoView VectorType;
ElementType alpha, beta, rNormSquare, rNormSquarePrev;
VectorType Ap, r, p;
const ElementType Zero(0), One(1);
blas::copy(b, r);
blas::mv(NoTrans, -One, A, x, One, r);
blas::copy(r, p);
rNormSquare = blas::dot(r, r);
for (int k=1; k<=maxIterations; ++k) {
if (sqrt(rNormSquare)<=tol) {
return k-1;
}
blas::mv(NoTrans, One, A, p, Zero, Ap);
alpha = rNormSquare/blas::dot(p, Ap);
blas::axpy(alpha, p, x);
blas::axpy(-alpha, Ap, r);
rNormSquarePrev = rNormSquare;
rNormSquare = blas::dot(r, r);
beta = rNormSquare/rNormSquarePrev;
blas::scal(beta, p);
blas::axpy(One, r, p);
}
return maxIterations;
}
#endif // FLENS_EXAMPLES_CG_BLAS_H
Let's check the equivalence of overloaded operation notation and explicit function calls step-by-step:
r = b - A*x;
blas::mv(NoTrans, -One, A, x, One, r);
p = r;
rNormSquare = r*r;
Ap = A*p;
alpha = rNormSquare/(p * Ap);
x += alpha*p;
r -= alpha*Ap;
rNormSquare = r*r;
p = beta*p + r;
blas::axpy(One, r, p);
}
return maxIterations;
}
Test Example
#include <cxxstd/iostream.h>
#include <flens/flens.cxx>
#ifndef USE_CG_BLAS
#include <flens/examples/cg.h>
#else
#include <flens/examples/cg_blas.h>
#endif
using namespace flens;
using namespace std;
int
main()
{
const int n = 5;
SyMatrix<FullStorage<double> > A(n, Upper);
A.diag(0) = 2;
A.diag(1) = -1;
cout << "A = " << A << endl;
DenseVector<Array<double> > x(n), z(n), b(n);
z = 1;
b = A*z;
cg(A, b, x);
cout << "x = " << x << endl;
x -= z;
cout << "max abs error = " << abs(x(blas::iamax(x))) << std::endl;
}
Compile and Run
Just to check, we use both variants of the cg-method:
-
Using the variant with overloaded operators:
$shell> cd flens/examples $shell> g++ -Wall -std=c++11 -I../.. symatrix-cg.cc $shell> ./a.out A = 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 x = 1 1 1 1 1 max abs error = 0
-
Using the variant with explicit function calls:
$shell> cd flens/examples $shell> g++ -Wall -std=c++11 -DUSE_CG_BLAS -I../.. symatrix-cg.cc $shell> ./a.out A = 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 -1 0 0 0 -1 2 x = 1 1 1 1 1 max abs error = 0