Sample solution

Content

template<typename T,
   template<typename> class MA,
   template<typename> class MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int scatter_by_block(const MA<T>& A, MB<T>& B, int root,
      MPI_Comm grid) {
   int nof_processes; MPI_Comm_size(grid, &nof_processes);
   int rank; MPI_Comm_rank(grid, &rank);
   int dims[2]; int coords[2]; int periods[2];
   MPI_Cart_get(grid, 2, dims, periods, coords);
   hpc::aux::UniformSlices<int> rows(dims[0], A.numRows());
   hpc::aux::UniformSlices<int> columns(dims[1], A.numCols());

   if (rank == root) {
      MPI_Request requests[nof_processes-1]; int ri = 0;
      for (int i = 0; i < nof_processes; ++i) {
         MPI_Cart_coords(grid, i, 2, coords);
         auto A_ = A.block(rows.offset(coords[0]),
            columns.offset(coords[1])).dim(
               rows.size(coords[0]), columns.size(coords[1]));
         if (i == root) {
            hpc::matvec::copy(A_, B);
         } else {
            MPI_Isend(&A_(0, 0), 1, get_type(A_), i, 0, grid, &requests[ri++]);
         }
      }
      for (auto& request: requests) {
         MPI_Status status;
         MPI_Wait(&request, &status);
      }
   } else {
      MPI_Status status;
      MPI_Recv(&B(0, 0), 1, get_type(B), root, 0, grid, &status);
   }
}

template<typename T,
   template<typename> typename MA,
   template<typename> typename MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int gather_by_block(const MA<T>& A, MB<T>& B, int root, MPI_Comm grid) {
   int nof_processes; MPI_Comm_size(grid, &nof_processes);
   int rank; MPI_Comm_rank(grid, &rank);
   int dims[2]; int coords[2]; int periods[2];
   MPI_Cart_get(grid, 2, dims, periods, coords);
   hpc::aux::UniformSlices<int> rows(dims[0], B.numRows());
   hpc::aux::UniformSlices<int> columns(dims[1], B.numCols());

   if (rank == root) {
      MPI_Request requests[nof_processes-1]; int ri = 0;
      for (int i = 0; i < nof_processes; ++i) {
         MPI_Cart_coords(grid, i, 2, coords);
         auto B_ = B.block(rows.offset(coords[0]),
            columns.offset(coords[1])).dim(
               rows.size(coords[0]), columns.size(coords[1]));
         if (i == root) {
            hpc::matvec::copy(A, B_);
         } else {
            MPI_Irecv(&B_(0, 0), 1, get_type(B_),
               i, 0, grid, &requests[ri++]);
         }
      }
      for (auto& request: requests) {
         MPI_Status status;
         MPI_Wait(&request, &status);
      }
   } else {
      MPI_Send(&A(0, 0), 1, get_type(A), root, 0, grid);
   }
}
theon$ mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather4 scatter-gather4.cpp
theon$ mpirun -np 4 scatter-gather4
  10000  10001  10002  10003  10004  10005  10006  20007  20008  20009  20010  20011  20012  20013
  10100  10101  10102  10103  10104  10105  10106  20107  20108  20109  20110  20111  20112  20113
  10200  10201  10202  10203  10204  10205  10206  20207  20208  20209  20210  20211  20212  20213
  10300  10301  10302  10303  10304  10305  10306  20307  20308  20309  20310  20311  20312  20313
  10400  10401  10402  10403  10404  10405  10406  20407  20408  20409  20410  20411  20412  20413
  10500  10501  10502  10503  10504  10505  10506  20507  20508  20509  20510  20511  20512  20513
  10600  10601  10602  10603  10604  10605  10606  20607  20608  20609  20610  20611  20612  20613
  30700  30701  30702  30703  30704  30705  30706  40707  40708  40709  40710  40711  40712  40713
  30800  30801  30802  30803  30804  30805  30806  40807  40808  40809  40810  40811  40812  40813
  30900  30901  30902  30903  30904  30905  30906  40907  40908  40909  40910  40911  40912  40913
  31000  31001  31002  31003  31004  31005  31006  41007  41008  41009  41010  41011  41012  41013
  31100  31101  31102  31103  31104  31105  31106  41107  41108  41109  41110  41111  41112  41113
  31200  31201  31202  31203  31204  31205  31206  41207  41208  41209  41210  41211  41212  41213
theon$ 
heim$ OMPI_CXX=g++-8.3 mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather4 scatter-gather4.cpp -Wno-literal-suffix
heim$ mpirun -np 4 scatter-gather4
  10000  10001  10002  10003  10004  10005  10006  20007  20008  20009  20010  20011  20012  20013
  10100  10101  10102  10103  10104  10105  10106  20107  20108  20109  20110  20111  20112  20113
  10200  10201  10202  10203  10204  10205  10206  20207  20208  20209  20210  20211  20212  20213
  10300  10301  10302  10303  10304  10305  10306  20307  20308  20309  20310  20311  20312  20313
  10400  10401  10402  10403  10404  10405  10406  20407  20408  20409  20410  20411  20412  20413
  10500  10501  10502  10503  10504  10505  10506  20507  20508  20509  20510  20511  20512  20513
  10600  10601  10602  10603  10604  10605  10606  20607  20608  20609  20610  20611  20612  20613
  30700  30701  30702  30703  30704  30705  30706  40707  40708  40709  40710  40711  40712  40713
  30800  30801  30802  30803  30804  30805  30806  40807  40808  40809  40810  40811  40812  40813
  30900  30901  30902  30903  30904  30905  30906  40907  40908  40909  40910  40911  40912  40913
  31000  31001  31002  31003  31004  31005  31006  41007  41008  41009  41010  41011  41012  41013
  31100  31101  31102  31103  31104  31105  31106  41107  41108  41109  41110  41111  41112  41113
  31200  31201  31202  31203  31204  31205  31206  41207  41208  41209  41210  41211  41212  41213
heim$