Sample solution

Content

template<typename T,
   template<typename> class MA,
   template<typename> class MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int scatter_by_block(const MA<T>& A, MB<T>& B, int root,
      MPI_Comm grid, unsigned int overlap = 0) {
   assert(overlap < A.numRows() && overlap < A.numCols());

   int nof_processes; MPI_Comm_size(grid, &nof_processes);
   int rank; MPI_Comm_rank(grid, &rank);
   int dims[2]; int coords[2]; int periods[2];
   MPI_Cart_get(grid, 2, dims, periods, coords);
   hpc::aux::UniformSlices<int> rows(dims[0], A.numRows() - 2*overlap);
   hpc::aux::UniformSlices<int> columns(dims[1], A.numCols() - 2*overlap);

   int rval = MPI_SUCCESS;
   if (rank == root) {
      MPI_Request requests[nof_processes-1]; int ri = 0;
      for (int i = 0; i < nof_processes; ++i) {
         MPI_Cart_coords(grid, i, 2, coords);
         auto A_ = A.block(rows.offset(coords[0]),
            columns.offset(coords[1])).dim(
               rows.size(coords[0]) + 2*overlap,
               columns.size(coords[1]) + 2*overlap);
         if (i == root) {
            hpc::matvec::copy(A_, B);
         } else {
            MPI_Isend(
               &A_(0, 0), 1, get_type(A_), i, 0, grid, &requests[ri++]);
         }
      }
      for (auto& request: requests) {
         MPI_Status status;
         MPI_Wait(&request, &status);
         if (status.MPI_ERROR != MPI_SUCCESS) {
            rval = status.MPI_ERROR;
         }
      }
   } else {
      MPI_Status status;
      rval = MPI_Recv(&B(0, 0), 1, get_type(B), root, 0, grid, &status);
   }
   return rval;
}

template<typename T,
   template<typename> typename MA,
   template<typename> typename MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int gather_by_block(const MA<T>& A, MB<T>& B, int root,
      MPI_Comm grid, unsigned int overlap = 0) {
   assert(overlap < A.numRows() && overlap < A.numCols());

   int nof_processes; MPI_Comm_size(grid, &nof_processes);
   int rank; MPI_Comm_rank(grid, &rank);
   int dims[2]; int coords[2]; int periods[2];
   MPI_Cart_get(grid, 2, dims, periods, coords);
   hpc::aux::UniformSlices<int> rows(dims[0], B.numRows() - 2*overlap);
   hpc::aux::UniformSlices<int> columns(dims[1], B.numCols() - 2*overlap);

   auto A_ = A.block(overlap, overlap).dim(
      A.numRows() - 2*overlap, A.numCols() - 2*overlap);
   int rval = MPI_SUCCESS;
   if (rank == root) {
      MPI_Request requests[nof_processes-1]; int ri = 0;
      for (int i = 0; i < nof_processes; ++i) {
         MPI_Cart_coords(grid, i, 2, coords);
         auto B_ = B.block(rows.offset(coords[0]) + overlap,
            columns.offset(coords[1]) + overlap).dim(
               rows.size(coords[0]), columns.size(coords[1]));
         if (i == root) {
            hpc::matvec::copy(A_, B_);
         } else {
            MPI_Irecv(&B_(0, 0), 1, get_type(B_),
               i, 0, grid, &requests[ri++]);
         }
      }
      for (auto& request: requests) {
         MPI_Status status;
         MPI_Wait(&request, &status);
         if (status.MPI_ERROR != MPI_SUCCESS) {
            rval = status.MPI_ERROR;
         }
      }
   } else {
      rval = MPI_Send(&A_(0, 0), 1, get_type(A_), root, 0, grid);
   }
   return rval;
}
theon$ mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather5 scatter-gather5.cpp
theon$ mpirun -np 4 scatter-gather5
      0      1      2      3      4      5      6      7      8      9     10     11     12     13
    100  10101  10102  10103  10104  10105  10106  20107  20108  20109  20110  20111  20112    113
    200  10201  10202  10203  10204  10205  10206  20207  20208  20209  20210  20211  20212    213
    300  10301  10302  10303  10304  10305  10306  20307  20308  20309  20310  20311  20312    313
    400  10401  10402  10403  10404  10405  10406  20407  20408  20409  20410  20411  20412    413
    500  10501  10502  10503  10504  10505  10506  20507  20508  20509  20510  20511  20512    513
    600  10601  10602  10603  10604  10605  10606  20607  20608  20609  20610  20611  20612    613
    700  30701  30702  30703  30704  30705  30706  40707  40708  40709  40710  40711  40712    713
    800  30801  30802  30803  30804  30805  30806  40807  40808  40809  40810  40811  40812    813
    900  30901  30902  30903  30904  30905  30906  40907  40908  40909  40910  40911  40912    913
   1000  31001  31002  31003  31004  31005  31006  41007  41008  41009  41010  41011  41012   1013
   1100  31101  31102  31103  31104  31105  31106  41107  41108  41109  41110  41111  41112   1113
   1200   1201   1202   1203   1204   1205   1206   1207   1208   1209   1210   1211   1212   1213
theon$ 
heim$ OMPI_CXX=g++-8.3 mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather5 scatter-gather5.cpp -Wno-literal-suffix
heim$ mpirun -np 4 scatter-gather5
      0      1      2      3      4      5      6      7      8      9     10     11     12     13
    100  10101  10102  10103  10104  10105  10106  20107  20108  20109  20110  20111  20112    113
    200  10201  10202  10203  10204  10205  10206  20207  20208  20209  20210  20211  20212    213
    300  10301  10302  10303  10304  10305  10306  20307  20308  20309  20310  20311  20312    313
    400  10401  10402  10403  10404  10405  10406  20407  20408  20409  20410  20411  20412    413
    500  10501  10502  10503  10504  10505  10506  20507  20508  20509  20510  20511  20512    513
    600  10601  10602  10603  10604  10605  10606  20607  20608  20609  20610  20611  20612    613
    700  30701  30702  30703  30704  30705  30706  40707  40708  40709  40710  40711  40712    713
    800  30801  30802  30803  30804  30805  30806  40807  40808  40809  40810  40811  40812    813
    900  30901  30902  30903  30904  30905  30906  40907  40908  40909  40910  40911  40912    913
   1000  31001  31002  31003  31004  31005  31006  41007  41008  41009  41010  41011  41012   1013
   1100  31101  31102  31103  31104  31105  31106  41107  41108  41109  41110  41111  41112   1113
   1200   1201   1202   1203   1204   1205   1206   1207   1208   1209   1210   1211   1212   1213
heim$