1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
#include <mpi.h>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/iterators.hpp>
#include <hpc/matvec/print.hpp>
#include <hpc/mpi/matrix.hpp>

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;

   using Matrix = GeMatrix<double>;
   int share = 3;
   int num_rows = nof_processes * share + 1;
   int num_cols = nof_processes * share + 2;

   Matrix A(num_rows, num_cols); /* entire matrix */

   /* create two-dimensional Cartesian grid for our prcesses */
   int dims[2] = {0, 0}; int periods[2] = {false, false};
   MPI_Dims_create(nof_processes, 2, dims);
   MPI_Comm grid;
   MPI_Cart_create(MPI_COMM_WORLD,
      2,        // number of dimensions
      dims,     // actual dimensions
      periods,  // both dimensions are non-periodical
      true,     // reorder is permitted
      &grid     // newly created communication domain
   );
   MPI_Comm_rank(grid, &rank); // update rank (could have changed)

   /* get our position within the grid */
   int overlap = 1;
   int coords[2];
   MPI_Cart_coords(grid, rank, 2, coords);
   UniformSlices<int> rows(dims[0], A.numRows() - 2*overlap);
   UniformSlices<int> columns(dims[1], A.numCols() - 2*overlap);

   Matrix B(rows.size(coords[0]) + 2*overlap,
      columns.size(coords[1]) + 2*overlap,
      Order::RowMajor);

   if (rank == 0) {
      for (auto [i, j, Aij]: A) {
	 Aij = i * 100 + j;
      }
   }

   scatter_by_block(A, B, 0, grid, overlap);
   for (auto [i, j, Bij]: B) {
      Bij += 10000 * (rank + 1);
      (void) i; (void) j; // suppress gcc warning
   }
   gather_by_block(B, A, 0, grid, overlap);

   MPI_Finalize();

   if (rank == 0) {
      print(A, " %6g");
   }
}