Verteilen mit Überlappungen

Um dem Jacobi-Verfahren noch mehr entgegenzukommen, lohnt es sich beim Verteilen und Einsammeln der Blöcke Überlappungen zu berücksichtigen.

Aufgabe

Erweitern Sie scatter_by_block und gather_by_block um den optionalen Parameter overlap, der angibt, wie umfangreich der Rand ist bzw. wieweit sich die einzelnen zu verteilenden Blöcke überlappen. Achten Sie darauf, dass bei overlap > 0 die Operationen nicht mehr symmetrisch sind, da scatter_by_block die überlappenden Bereiche mehrfach versendet und die Ränder ebenfalls verteilt, während gather_by_block nur jeweils die Innenbereiche verschickt.

template<typename MA, typename MB>
typename std::enable_if<
   hpc::matvec::IsGeMatrix<MA>::value && hpc::matvec::IsGeMatrix<MB>::value &&
      std::is_same<typename MA::ElementType, typename MB::ElementType>::value,
   int>::type
scatter_by_block(const MA& A, MB& B, int root,
      MPI_Comm grid, int dims[2], int coords[2], int overlap = 0) {
   /* ... */
}

template<typename MA, typename MB>
typename std::enable_if<
   hpc::matvec::IsGeMatrix<MA>::value && hpc::matvec::IsGeMatrix<MB>::value &&
      std::is_same<typename MA::ElementType, typename MB::ElementType>::value,
   int>::type
gather_by_block(const MA& A, MB& B, int root,
      MPI_Comm grid, int dims[2], int coords[2], int overlap = 0) {
   /* ... */
}

Vorlage

Zum Testen steht eine Vorlage zur Verfügung:

session22/scatter-gather5.cpp

#include <mpi.h>
#include <hpc/mpi/matrix.h>
#include <hpc/matvec/gematrix.h>
#include <hpc/matvec/print.h>
#include <hpc/matvec/apply.h>

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;

   using Matrix = GeMatrix<double>;
   using Index = typename Matrix::Index;
   int share = 3;
   int num_rows = nof_processes * share + 1;
   int num_cols = nof_processes * share + 2;

   Matrix A(num_rows, num_cols); /* entire matrix */

   /* create two-dimensional Cartesian grid for our prcesses */
   int dims[2] = {0, 0}; int periods[2] = {false, false};
   MPI_Dims_create(nof_processes, 2, dims);
   MPI_Comm grid;
   MPI_Cart_create(MPI_COMM_WORLD,
      2,        // number of dimensions
      dims,     // actual dimensions
      periods,  // both dimensions are non-periodical
      true,     // reorder is permitted
      &grid     // newly created communication domain
   );
   MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed)

   /* get our position within the grid */
   int overlap = 1;
   int coords[2];
   MPI_Cart_coords(grid, rank, 2, coords);
   Slices<int> rows(dims[0], A.numRows - 2*overlap);
   Slices<int> columns(dims[1], A.numCols - 2*overlap);

   Matrix B(rows.size(coords[0]) + 2*overlap,
      columns.size(coords[1]) + 2*overlap,
      StorageOrder::RowMajor);

   if (rank == 0) {
      apply(A, [](double& val, Index i, Index j) -> void {
         val = i * 100 + j;
      });
   }

   scatter_by_block(A, B, 0, grid, dims, coords, overlap);
   apply(B, [=](double& val, Index i, Index j) -> void {
      val += 10000 * (rank + 1);
   });
   gather_by_block(B, A, 0, grid, dims, coords, overlap);

   MPI_Finalize();

   if (rank == 0) {
      print(A, "A");
   }
}