Scatter operations with overlaps

Content

Scatter and gather operations
Sample solution
Scatter and Gather with non-uniform partitions
Sample solution
Non-blocking communication
Sample solution
Two-dimensional organization of processes
Sample solution
Scatter operations with overlaps
- Exercise
- Source code
Sample solution
Two-dimensional partitioning for the Jacobi solver
Sample solution

To accommodate the Jacobi solver, it would be helpful to support overlapping sections.

Exercise

Extend scatter_by_block and gather_by_block by adding an optional parameter overlap that specifies the size of the border. Please consider that in case of overlap > 0 the scatter and gather operations are no longer symmetrical as scatter_by_block sends overlapping regions at least twice while gather_by_block sends just the inner sections.

template<typename T,
   template<typename> class MA,
   template<typename> class MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int scatter_by_block(const MA<T>& A, MB<T>& B, int root,
      MPI_Comm grid, int dims[2], int coords[2], int overlap = 0) {
   /* ... */
}

template<typename T,
   template<typename> typename MA,
   template<typename> typename MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int gather_by_block(const MA<T>& A, MB<T>& B, int root,
      MPI_Comm grid, int dims[2], int coords[2], int overlap = 0) {
   /* ... */
}

Source code

Following source code is available for testing:

session07/scatter-gather5.cpp

#include <mpi.h>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/iterators.hpp>
#include <hpc/matvec/print.hpp>
#include <hpc/mpi/matrix.hpp>

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;

   using Matrix = GeMatrix<double>;
   int share = 3;
   int num_rows = nof_processes * share + 1;
   int num_cols = nof_processes * share + 2;

   Matrix A(num_rows, num_cols); /* entire matrix */

   /* create two-dimensional Cartesian grid for our prcesses */
   int dims[2] = {0, 0}; int periods[2] = {false, false};
   MPI_Dims_create(nof_processes, 2, dims);
   MPI_Comm grid;
   MPI_Cart_create(MPI_COMM_WORLD,
      2,        // number of dimensions
      dims,     // actual dimensions
      periods,  // both dimensions are non-periodical
      true,     // reorder is permitted
      &grid     // newly created communication domain
   );
   MPI_Comm_rank(grid, &rank); // update rank (could have changed)

   /* get our position within the grid */
   int overlap = 1;
   int coords[2];
   MPI_Cart_coords(grid, rank, 2, coords);
   UniformSlices<int> rows(dims[0], A.numRows() - 2*overlap);
   UniformSlices<int> columns(dims[1], A.numCols() - 2*overlap);

   Matrix B(rows.size(coords[0]) + 2*overlap,
      columns.size(coords[1]) + 2*overlap,
      Order::RowMajor);

   if (rank == 0) {
      for (auto [i, j, Aij]: A) {
	 Aij = i * 100 + j;
      }
   }

   scatter_by_block(A, B, 0, grid, overlap);
   for (auto [i, j, Bij]: B) {
      Bij += 10000 * (rank + 1);
      (void) i; (void) j; // suppress gcc warning
   }
   gather_by_block(B, A, 0, grid, overlap);

   MPI_Finalize();

   if (rank == 0) {
      print(A, " %6g");
   }
}