Sample solution

Content

template<typename T,
   template<typename> class MA,
   template<typename> class MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int scatter_by_row(const MA<T>& A, MB<T>& B, int root, MPI_Comm comm) {

   int nof_processes; MPI_Comm_size(comm, &nof_processes);
   int rank; MPI_Comm_rank(comm, &rank);

   MPI_Datatype rowtype_B = get_row_type(B);
   if (rank == root) {
      assert(A.numCols() == B.numCols());

      hpc::aux::UniformSlices<int> slices(nof_processes, A.numRows());
      std::vector<int> counts(nof_processes);
      std::vector<int> offsets(nof_processes);

      MPI_Datatype rowtype_A = get_row_type(A);
      for (int i = 0; i < nof_processes; ++i) {
         if (i < A.numRows()) {
            counts[i] = slices.size(i);
            offsets[i] = slices.offset(i);
         } else {
            counts[i] = 0; offsets[i] = 0;
         }
      }
      int recvcount = counts[rank];
      assert(B.numRows() == recvcount);

      return MPI_Scatterv(
         &A(0, 0), &counts[0], &offsets[0], rowtype_A,
         &B(0, 0), recvcount, rowtype_B, root, comm);
   } else {
      int recvcount = B.numRows();
      return MPI_Scatterv(nullptr, nullptr, nullptr, nullptr,
         &B(0, 0), recvcount, rowtype_B, root, comm);
   }
}

template<typename T,
   template<typename> class MA,
   template<typename> class MB,
   Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int gather_by_row(const MA<T>& A, MB<T>& B, int root, MPI_Comm comm) {
   int nof_processes; MPI_Comm_size(comm, &nof_processes);
   int rank; MPI_Comm_rank(comm, &rank);

   MPI_Datatype rowtype_A = get_row_type(A);

   if (rank == root) {
      assert(A.numCols() == B.numCols());

      hpc::aux::UniformSlices<int> slices(nof_processes, B.numRows());
      std::vector<int> counts(nof_processes);
      std::vector<int> offsets(nof_processes);

      for (int i = 0; i < nof_processes; ++i) {
         if (i < B.numRows()) {
            counts[i] = slices.size(i);
            offsets[i] = slices.offset(i);
         } else {
            counts[i] = 0; offsets[i] = 0;
         }
      }
      int sendcount = counts[rank];
      assert(A.numRows() == sendcount);

      MPI_Datatype rowtype_B = get_row_type(B);

      return MPI_Gatherv(
         &A(0, 0), sendcount, rowtype_A,
         &B(0, 0), &counts[0], &offsets[0], rowtype_B, root, comm);
   } else {
      int sendcount = A.numRows();
      return MPI_Gatherv((void*) &A(0, 0), sendcount, rowtype_A,
         nullptr, nullptr, nullptr, nullptr, root, comm);
   }
}
theon$ mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather3 scatter-gather3.cpp
theon$ mpirun -np 4 scatter-gather3
  10000  10001  10002  10003  10004
  10100  10101  10102  10103  10104
  10200  10201  10202  10203  10204
  10300  10301  10302  10303  10304
  20400  20401  20402  20403  20404
  20500  20501  20502  20503  20504
  20600  20601  20602  20603  20604
  30700  30701  30702  30703  30704
  30800  30801  30802  30803  30804
  30900  30901  30902  30903  30904
  41000  41001  41002  41003  41004
  41100  41101  41102  41103  41104
  41200  41201  41202  41203  41204
theon$ 
heim$ OMPI_CXX=g++-8.3 mpic++ -g -std=c++17 -I/home/numerik/pub/pp/ss19/lib -o scatter-gather3 scatter-gather3.cpp -Wno-literal-suffix
heim$ mpirun -np 4 scatter-gather3
  10000  10001  10002  10003  10004
  10100  10101  10102  10103  10104
  10200  10201  10202  10203  10204
  10300  10301  10302  10303  10304
  20400  20401  20402  20403  20404
  20500  20501  20502  20503  20504
  20600  20601  20602  20603  20604
  30700  30701  30702  30703  30704
  30800  30801  30802  30803  30804
  30900  30901  30902  30903  30904
  41000  41001  41002  41003  41004
  41100  41101  41102  41103  41104
  41200  41201  41202  41203  41204
heim$