1
       2
       3
       4
       5
       6
       7
       8
       9
      10
      11
      12
      13
      14
      15
      16
      17
      18
      19
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
      30
      31
      32
      33
      34
      35
      36
      37
      38
      39
      40
      41
      42
      43
      44
      45
      46
      47
      48
      49
      50
      51
      52
      53
      54
      55
      56
      57
      58
      59
      60
      61
      62
      63
      64
      65
      66
      67
      68
      69
      70
      71
      72
      73
      74
      75
      76
      77
      78
      79
      80
      81
      82
      83
      84
      85
      86
      87
      88
      89
      90
      91
      92
      93
      94
      95
      96
      97
      98
      99
     100
     101
     102
     103
     104
     105
     106
     107
     108
     109
     110
     111
     112
     113
     114
     115
     116
     117
     118
     119
     120
     121
     122
     123
     124
     125
     126
     127
     128
     129
     130
     131
     132
     133
     134
     135
     136
     137
     138
#include <cassert>
#include <cmath>
#include <cstdlib>
#include <unistd.h>
#include <mpi.h>
#include <printf.hpp>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/mpi/matrix.hpp>
#include <hpc/mpi/vector.hpp>
#include <hpc/matvec/copy.hpp>
#include <hpc/matvec/iterators.hpp>
#include <hpc/matvec/matrix2pixbuf.hpp>
#include <hpc/aux/slices.hpp>
#include <hpc/aux/hsvcolor.hpp>

const auto PI = std::acos(-1.0);
const auto E = std::exp(1.0);
const auto E_POWER_MINUS_PI = std::pow(E, -PI);

using namespace hpc;

template<typename T, template<typename> typename Matrix,
   Require<Ge<Matrix<T>>> = true>
T jacobi_iteration(const Matrix<T>& A, Matrix<T>& B) {
   assert(A.numRows() > 2 && A.numCols() > 2);
   T maxdiff = 0;
   for (std::size_t i = 1; i + 1 < B.numRows(); ++i) {
      for (std::size_t j = 1; j + 1 < B.numCols(); ++j) {
	 B(i, j) = 0.25 *
	    (A(i - 1, j) + A(i + 1, j) + A(i, j - 1) + A(i, j + 1));
	 T diff = std::fabs(A(i, j) - B(i, j));
	 if (diff > maxdiff) maxdiff = diff;
      }
   }
   return maxdiff;
}

template<typename Matrix>
void exchange_with_neighbors(Matrix& A,
      int previous, int next, MPI_Datatype rowtype) {
   MPI_Status status;
   // upward
   MPI_Sendrecv(&A(1, 0), 1, rowtype, previous, 0,
      &A(A.numRows()-1, 0), 1, rowtype, next, 0,
      MPI_COMM_WORLD, &status);
   // downward
   MPI_Sendrecv(&A(A.numRows()-2, 0), 1, rowtype, next, 0,
      &A(0, 0), 1, rowtype, previous, 0,
      MPI_COMM_WORLD, &status);
}

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;
   using Matrix = GeMatrix<double>;

   Matrix A(100, 100, Order::RowMajor);
   if (rank == 0) {
      for (auto [i, j, Aij]: A) {
	 if (j == 0) {
	    Aij = std::sin(PI * ((double)i/(A.numRows()-1)));
	 } else if (j == A.numCols() - 1) {
	    Aij = std::sin(PI * ((double)i/(A.numRows()-1))) *
	       E_POWER_MINUS_PI;
	 } else {
	    Aij = 0;
	 }
      }
   }

   UniformSlices<std::size_t> slices(nof_processes, A.numRows() - 2);
   Matrix B1(slices.size(rank) + 2, A.numCols(), Order::RowMajor);
   for (auto [i, j, Bij]: B1) {
      Bij = 0;
   }
   auto B = B1.block(1, 0).dim(B1.numRows() - 2, B1.numCols());
   MPI_Datatype rowtype = get_row_type(B);

   /* distribute main body of A */
   auto A_ = A.block(1, 0).dim(A.numRows() - 2, A.numCols());
   scatter_by_row(A_, B, 0, MPI_COMM_WORLD);
   /* distribute first and last row of A */
   if (rank == 0) {
      copy(A.block(0, 0).dim(1, A.numCols()),
	 B1.block(0, 0).dim(1, B1.numCols()));
   }
   if (nof_processes == 1) {
      copy(A.block(A.numRows()-1, 0).dim(1, A.numCols()),
	 B1.block(B.numRows()-1, 0).dim(1, B1.numCols()));
   } else if (rank == 0) {
      MPI_Send(&A(A.numRows()-1, 0), 1, rowtype, nof_processes-1, 0,
	 MPI_COMM_WORLD);
   } else if (rank == nof_processes - 1) {
      MPI_Status status;
      MPI_Recv(&B1(B.numRows()-1, 0), 1, rowtype,
	 0, 0, MPI_COMM_WORLD, &status);
   }

   Matrix B2(B1.numRows(), B1.numCols(), Order::RowMajor);
   copy(B1, B2); /* actually just the border needs to be copied */

   int previous = rank == 0? MPI_PROC_NULL: rank-1;
   int next = rank == nof_processes-1? MPI_PROC_NULL: rank+1;

   double eps = 1e-6; unsigned int iterations;
   for (iterations = 0; ; ++iterations) {
      double maxdiff = jacobi_iteration(B1, B2);
      exchange_with_neighbors(B2, previous, next, rowtype);
      maxdiff = jacobi_iteration(B2, B1);
      if (iterations % 10 == 0) {
	 double global_max;
	 MPI_Reduce(&maxdiff, &global_max, 1, get_type(maxdiff),
	    MPI_MAX, 0, MPI_COMM_WORLD);
	 MPI_Bcast(&global_max, 1, get_type(maxdiff), 0, MPI_COMM_WORLD);
	 if (global_max < eps) break;
      }
      exchange_with_neighbors(B1, previous, next, rowtype);
   }
   if (rank == 0) fmt::printf("%d iterations\n", iterations);

   gather_by_row(B, A_, 0, MPI_COMM_WORLD);

   MPI_Finalize();

   if (rank == 0) {
      auto pixbuf = create_pixbuf(A, [](double val) -> HSVColor<double> {
	 return HSVColor<double>((1-val) * 240, 1, 1);
      }, 8);
      gdk_pixbuf_save(pixbuf, "jacobi.jpg", "jpeg", nullptr,
	 "quality", "100", nullptr);
   }
}