1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
     111
     112
     113
     114
     115
     116
     117
     118
     119
     120
     121
     122
     123
     124
     125
     126
     127
     128
     129
     130
     131
     132
     133
     134
     135
     136
     137
     138
     139
     140
     141
     142
     143
     144
     145
     146
     147
     148
     149
     150
     151
     152
#include <cassert>
#include <cmath>
#include <mpi.h>
#include <printf.hpp>
#include <hpc/aux/hsvcolor.hpp>
#include <hpc/aux/slices.hpp>
#include <hpc/matvec/copy.hpp>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/iterators.hpp>
#include <hpc/matvec/matrix2pixbuf.hpp>
#include <hpc/mpi/matrix.hpp>
#include <hpc/mpi/vector.hpp>

template<typename T>
const T PI = std::acos(T(-1.0));
template<typename T>
const T E = std::exp(T(1.0));
template<typename T>
const T E_POWER_MINUS_PI = std::pow(E<T>, -PI<T>);

using namespace hpc;

template<typename T, template<typename> class Matrix,
   Require<Ge<Matrix<T>>> = true>
T jacobi_iteration(const Matrix<T>& A, Matrix<T>& B) {
   assert(A.numRows() > 2 && A.numCols() > 2);
   T maxdiff = 0;
   for (std::size_t i = 1; i + 1 < B.numRows(); ++i) {
      for (std::size_t j = 1; j + 1 < B.numCols(); ++j) {
	 B(i, j) = 0.25 *
	    (A(i - 1, j) + A(i + 1, j) + A(i, j - 1) + A(i, j + 1));
	 T diff = std::fabs(A(i, j) - B(i, j));
	 if (diff > maxdiff) maxdiff = diff;
      }
   }
   return maxdiff;
}

template<typename T, template<typename> class Matrix,
   Require<Ge<Matrix<T>>> = true>
void exchange_with_neighbors(Matrix<T>& A,
      /* ranks of the neighbors */
      int previous, int next,
      /* data type for an inner row, i.e. without the border */
      MPI_Datatype rowtype) {
   MPI_Status status;
   // upward
   MPI_Sendrecv(&A(1, 0), 1, rowtype, previous, 0,
      &A(A.numRows()-1, 0), 1, rowtype, next, 0,
      MPI_COMM_WORLD, &status);
   // downward
   MPI_Sendrecv(&A(A.numRows()-2, 0), 1, rowtype, next, 0,
      &A(0, 0), 1, rowtype, previous, 0,
      MPI_COMM_WORLD, &status);
}

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;
   using T = double;
   using Matrix = GeMatrix<T>;

   /* initialize the entire matrix, including its borders */
   Matrix A(100, 100, Order::RowMajor);
   if (rank == 0) {
      for (auto [i, j, Aij]: A) {
	 if (j == 0) {
	    Aij = std::sin(PI<T> * (T(i)/(A.numRows()-1)));
	 } else if (j == A.numCols() - 1) {
	    Aij = std::sin(PI<T> * (T(i)/(A.numRows()-1))) *
	       E_POWER_MINUS_PI<T>;
	 } else {
	    Aij = 0;
	 }
      }
   }

   /* we use matrices B1 and B2 to work in our set of rows */
   UniformSlices<std::size_t> slices(nof_processes, A.numRows() - 2);
   Matrix B1(slices.size(rank) + 2, A.numCols(), Order::RowMajor);
   for (auto [i, j, Bij]: B1) {
      Bij = 0;
      (void) i; (void) j; // suppress gcc warning
   }
   auto B = B1.block(1, 0).dim(B1.numRows() - 2, B1.numCols());

   /* distribute main body of A include left and right border */
   auto A_ = A.block(1, 0).dim(A.numRows() - 2, A.numCols());
   scatter_by_row(A_, B, 0, MPI_COMM_WORLD);

   /* distribute first and last row of A */
   if (rank == 0) {
      copy(A.block(0, 0).dim(1, A.numCols()),
	 B1.block(0, 0).dim(1, B1.numCols()));
   }
   MPI_Datatype full_rowtype = get_row_type(B);
   if (nof_processes == 1) {
      copy(A.block(A.numRows()-1, 0).dim(1, A.numCols()),
	 B1.block(B.numRows()-1, 0).dim(1, B1.numCols()));
   } else if (rank == 0) {
      MPI_Send(&A(A.numRows()-1, 0), 1, full_rowtype, nof_processes-1, 0,
	 MPI_COMM_WORLD);
   } else if (rank == nof_processes - 1) {
      MPI_Status status;
      MPI_Recv(&B1(B.numRows()-1, 0), 1, full_rowtype,
	 0, 0, MPI_COMM_WORLD, &status);
   }

   Matrix B2(B1.numRows(), B1.numCols(), Order::RowMajor);
   copy(B1, B2); /* actually just the border needs to be copied */

   /* compute type for inner rows without the border */
   auto B_inner = B.block(0, 1).dim(1, A.numCols() - 2);
   MPI_Datatype inner_rowtype = get_type(B_inner);

   int previous = rank == 0? MPI_PROC_NULL: rank-1;
   int next = rank == nof_processes-1? MPI_PROC_NULL: rank+1;

   T eps = 1e-6; unsigned int iterations;
   for (iterations = 0; ; ++iterations) {
      T maxdiff = jacobi_iteration(B1, B2);
      exchange_with_neighbors(B2, previous, next, inner_rowtype);
      maxdiff = jacobi_iteration(B2, B1);
      if (iterations % 10 == 0) {
	 T global_max;
	 MPI_Reduce(&maxdiff, &global_max, 1, get_type(maxdiff),
	    MPI_MAX, 0, MPI_COMM_WORLD);
	 MPI_Bcast(&global_max, 1, get_type(maxdiff), 0, MPI_COMM_WORLD);
	 if (global_max < eps) break;
      }
      exchange_with_neighbors(B1, previous, next, inner_rowtype);
   }
   if (rank == 0) fmt::printf("%d iterations\n", iterations);

   gather_by_row(B, A_, 0, MPI_COMM_WORLD);

   MPI_Finalize();

   if (rank == 0) {
      auto pixbuf = create_pixbuf(A, [](T val) -> HSVColor<float> {
	 return HSVColor<float>((1-val) * 240, 1, 1);
      }, 8);
      gdk_pixbuf_save(pixbuf, "jacobi.jpg", "jpeg", nullptr,
	 "quality", "100", nullptr);
   }
}