Lösungsvorschlag

#include <cassert>
#include <cstdio>
#include <mpi.h>
#include <hpc/matvec/gematrix.h>
#include <hpc/mpi/matrix.h>
#include <hpc/mpi/vector.h>
#include <hpc/matvec/apply.h>
#include <hpc/matvec/print.h>

int main(int argc, char** argv) {
   MPI_Init(&argc, &argv);

   int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
   int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);
   assert(nof_processes == 2);

   using namespace hpc::matvec;
   using namespace hpc::mpi;
   using namespace hpc::aux;

   GeMatrix<double> A(3, 7, StorageOrder::ColMajor);
   GeMatrix<double> B(3, 7, StorageOrder::RowMajor);
   using Index = GeMatrix<double>::Index;
   MPI_Datatype datatype_A = get_type(A);
   MPI_Datatype datatype_B = get_type(B);

   if (rank == 0) {
      MPI_Aint true_extent; MPI_Aint true_lb;
      MPI_Type_get_true_extent(datatype_A, &true_lb, &true_extent);
      MPI_Aint extent; MPI_Aint lb;
      MPI_Type_get_extent(datatype_A, &lb, &extent);
      auto size = sizeof(double) * A.numRows * A.numCols;
      printf("true extent of A = %d\n", (int) true_extent);
      printf("extent of A = %d\n", (int) true_extent);
      printf("size = %zd\n", size);
      assert(true_extent == size);
   }

   if (rank == 0) {
      apply(A, [](double& val, Index i, Index j) -> void {
         val = i * 100 + j;
      });
      MPI_Send(&A(0, 0), 1, datatype_A, 1, 0, MPI_COMM_WORLD);
      MPI_Status status;
      MPI_Recv(&B(0, 0), 1, datatype_B, 1, 0, MPI_COMM_WORLD, &status);
      apply(A, [&](double& val, Index i, Index j) -> void {
         if (val != B(i, j)) {
            printf("verification failed for (%d,%d): %lg vs %lg\n",
               (int)i, (int) j, val, B(i, j));
         }
      });
   } else {
      MPI_Status status;
      MPI_Recv(&A(0, 0), 1, datatype_A, 0, 0, MPI_COMM_WORLD, &status);
      MPI_Send(&A(0, 0), 1, datatype_A, 0, 0, MPI_COMM_WORLD);
   }

   MPI_Finalize();
}
#ifndef HPC_MPI_MATRIX_H
#define HPC_MPI_MATRIX_H 1

#include <cassert>
#include <memory>
#include <type_traits>
#include <vector>
#include <mpi.h>
#include <hpc/matvec/gematrix.h>
#include <hpc/matvec/isgematrix.h>
#include <hpc/mpi/fundamental.h>
#include <hpc/aux/slices.h>

namespace hpc { namespace mpi {

template<typename Matrix>
typename std::enable_if<hpc::matvec::IsGeMatrix<Matrix>::value,
   MPI_Datatype>::type
get_row_type(const Matrix& A) {
   using ElementType = typename Matrix::ElementType;
   MPI_Datatype rowtype;
   MPI_Type_vector(
      /* count = */ A.numCols,
      /* blocklength = */ 1,
      /* stride = */ A.incCol,
      /* element type = */ get_type(A(0, 0)),
      /* newly created type = */ &rowtype);

   /* in case of row major we are finished */
   if (A.incRow == A.numCols) {
      MPI_Type_commit(&rowtype);
      return rowtype;
   }

   /* the extent of the MPI data type does not match
      the offset of subsequent rows -- this is a problem
      whenever we want to handle more than one row;
      to fix this we need to use the resize function
      which allows us to adapt the extent to A.incRow */
   MPI_Datatype resized_rowtype;
   MPI_Type_create_resized(rowtype, 0, /* lb remains unchanged */
      A.incRow * sizeof(ElementType), &resized_rowtype);
   MPI_Type_commit(&resized_rowtype);
   return resized_rowtype;
}

template<typename Matrix>
typename std::enable_if<hpc::matvec::IsGeMatrix<Matrix>::value,
   MPI_Datatype>::type
get_type(const Matrix& A) {
   using ElementType = typename Matrix::ElementType;
   MPI_Datatype datatype;
   if (A.incCol == 1) {
      MPI_Type_vector(
         /* count = */ A.numRows,
         /* blocklength = */ A.numCols,
         /* stride = */ A.incRow,
         /* element type = */ get_type(A(0, 0)),
         /* newly created type = */ &datatype);
   } else {
      /* vector of row vectors */
      MPI_Datatype rowtype = get_row_type(A);
      MPI_Type_contiguous(A.numRows, rowtype, &datatype);
   }
   MPI_Type_commit(&datatype);
   return datatype;
}

template<typename MA, typename MB>
typename std::enable_if<
   hpc::matvec::IsGeMatrix<MA>::value && hpc::matvec::IsGeMatrix<MB>::value &&
      std::is_same<typename MA::ElementType, typename MB::ElementType>::value,
   int>::type
scatter_by_row(const MA& A, MB& B, int root, MPI_Comm comm) {
   assert(A.numCols == B.numCols);

   int nof_processes; MPI_Comm_size(comm, &nof_processes);
   int rank; MPI_Comm_rank(comm, &rank);

   hpc::aux::Slices<int> slices(nof_processes, A.numRows);
   std::vector<int> counts(nof_processes);
   std::vector<int> offsets(nof_processes);

   MPI_Datatype rowtype_A = get_row_type(A);
   for (int i = 0; i < nof_processes; ++i) {
      if (i < A.numRows) {
         counts[i] = slices.size(i);
         offsets[i] = slices.offset(i);
      } else {
         counts[i] = 0; offsets[i] = 0;
      }
   }
   int recvcount = counts[rank];
   assert(B.numRows == recvcount);

   MPI_Datatype rowtype_B = get_row_type(B);

   /* OpenMPI implementation of Debian wheezy expects void* instead
      of const void*; hence we need to remove const */
   return MPI_Scatterv((void*) &A(0, 0), &counts[0], &offsets[0], rowtype_A,
      &B(0, 0), recvcount, rowtype_B, root, comm);
}

template<typename MA, typename MB>
typename std::enable_if<
   hpc::matvec::IsGeMatrix<MA>::value && hpc::matvec::IsGeMatrix<MB>::value &&
      std::is_same<typename MA::ElementType, typename MB::ElementType>::value,
   int>::type
gather_by_row(const MA& A, MB& B, int root, MPI_Comm comm) {
   assert(A.numCols == B.numCols);

   int nof_processes; MPI_Comm_size(comm, &nof_processes);
   int rank; MPI_Comm_rank(comm, &rank);

   hpc::aux::Slices<int> slices(nof_processes, B.numRows);
   std::vector<int> counts(nof_processes);
   std::vector<int> offsets(nof_processes);

   for (int i = 0; i < nof_processes; ++i) {
      if (i < B.numRows) {
         counts[i] = slices.size(i);
         offsets[i] = slices.offset(i);
      } else {
         counts[i] = 0; offsets[i] = 0;
      }
   }
   int sendcount = counts[rank];
   assert(A.numRows == sendcount);

   MPI_Datatype rowtype_A = get_row_type(A);
   MPI_Datatype rowtype_B = get_row_type(B);

   /* OpenMPI implementation of Debian wheezy expects void* instead
      of const void*; hence we need to remove const */
   return MPI_Gatherv((void*) &A(0, 0), sendcount, rowtype_A,
      &B(0, 0), &counts[0], &offsets[0], rowtype_B, root, comm);
}

} } // namespaces mpi, hpc

#endif // HPC_MPI_MATRIX_H