Possible solution

Content

Basically we use the solution from the jacobi solver in the last session (just without any overlap).

Source code

#ifndef HPC_MPI_GRID_HPP
#define HPC_MPI_GRID_HPP 1

#include <mpi.h>

namespace hpc { namespace mpi {

struct Grid
{
    Grid()
    {
        MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
        MPI_Comm_rank(MPI_COMM_WORLD, &rank);

        int dims[2] = {0, 0};
        int periods[2] = {false, false};
        MPI_Dims_create(nof_processes, 2, dims);
        MPI_Cart_create(MPI_COMM_WORLD,
                        2,        // number of dimensions
                        dims,     // actual dimensions
                        periods,  // both dimensions are non-periodical
                        true,     // reorder is permitted
                        &comm     // newly created communication domain
                       );
        MPI_Comm_rank(comm, &rank); // update rank (could have changed)
        numNodeRows = dims[0];
        numNodeCols = dims[1];

        /* get our position within the grid */
        int coords[2];
        MPI_Cart_coords(comm, rank, 2, coords);
        nodeRow = coords[0];
        nodeCol = coords[1];
    }

    int         numNodeRows, numNodeCols;
    int         nodeRow, nodeCol;
    int         nof_processes, rank;

    MPI_Comm    comm;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GRID_HPP
#ifndef HPC_MPI_GEMATRIX_HPP
#define HPC_MPI_GEMATRIX_HPP 1

#include <cassert>
#include <memory>
#include <type_traits>
#include <mpi.h>
#include <hpc/aux/slices.hpp>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/mpi/fundamental.hpp>
#include "grid.hpp"

namespace hpc { namespace mpi {

template <typename T>
struct GeMatrix
{
    GeMatrix(std::size_t numRows, std::size_t numCols, Grid grid)
        : numRows(numRows), numCols(numCols),
          rows(grid.numNodeRows, numRows),
          cols(grid.numNodeCols, numCols),
          grid(grid),
          buffer(rows.size(grid.nodeRow),cols.size(grid.nodeCol))
    {
    }

    std::size_t
    rowOffset(int rank) const
    {
        /* get our position within the grid */
        int coords[2];
        MPI_Cart_coords(grid.comm, rank, 2, coords);
        return rows.offset(coords[0]);
    }

    std::size_t
    colOffset(int rank) const
    {
        /* get our position within the grid */
        int coords[2];
        MPI_Cart_coords(grid.comm, rank, 2, coords);
        return cols.offset(coords[1]);
    }

    std::size_t
    numLocalRows(int rank) const
    {
        /* get our position within the grid */
        int coords[2];
        MPI_Cart_coords(grid.comm, rank, 2, coords);
        return rows.size(coords[0]);
    }

    std::size_t
    numLocalCols(int rank) const
    {
        /* get our position within the grid */
        int coords[2];
        MPI_Cart_coords(grid.comm, rank, 2, coords);
        return cols.size(coords[1]);
    }

    std::size_t                  numRows, numCols;
    hpc::aux::UniformSlices<int> rows, cols;
    Grid                         grid;
    hpc::matvec::GeMatrix<T>     buffer;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GEMATRIX_HPP
#ifndef HPC_MPI_COPY_HPP
#define HPC_MPI_COPY_HPP 1

#include <cassert>
#include <mpi.h>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/copy.hpp>
#include "gematrix.hpp"
#include <hpc/mpi/matrix.hpp>

namespace hpc { namespace mpi {

// scatter: Basically this is 'scatter_by_block' from session 25
template <typename T>
void
copy(const hpc::matvec::GeMatrix<T> &A, GeMatrix<T> &B)
{
    int nof_processes = B.grid.nof_processes;
    int rank          = B.grid.rank;
    auto &rows        = B.rows;
    auto &columns     = B.cols;

    int dims[2]       = { B.grid.numNodeRows, B.grid.numNodeCols };
    int coords[2]     = { B.grid.nodeRow, B.grid.nodeCol };

    auto &grid        = B.grid.comm;

    if (rank == 0) {
        MPI_Request requests[nof_processes-1]; int ri = 0;
        for (int i = 0; i < nof_processes; ++i) {
            MPI_Cart_coords(grid, i, 2, coords);

            auto A_ = A.block(rows.offset(coords[0]),
                              columns.offset(coords[1])
                        ).dim(rows.size(coords[0]),
                              columns.size(coords[1]));
            if (i == 0) {
                hpc::matvec::copy(A_, B.buffer);
            } else {
                /* aged OpenMPI implementations of Debian wheezy and jessie
                   expect void* instead of const void*;
                   hence we need to remove const */
                MPI_Isend(
                    const_cast<void*>(reinterpret_cast<const void*>(&A_(0, 0))),
                    1, get_type(A_), i, 0, grid, &requests[ri++]);
            }
        }
        for (auto& request: requests) {
            MPI_Status status;
            MPI_Wait(&request, &status);
        }
   } else {
      auto &B_ = B.buffer;

      MPI_Status status;
      MPI_Recv(&B_(0, 0), 1, get_type(B_), 0, 0, grid, &status);
   }
}

// gather: Basically this is 'gather_by_block' from session 25
template <typename T>
void
copy(const GeMatrix<T> &A, hpc::matvec::GeMatrix<T> &B)
{
    int nof_processes = A.grid.nof_processes;
    int rank          = A.grid.rank;
    auto &rows        = A.rows;
    auto &columns     = A.cols;

    int dims[2]       = { A.grid.numNodeRows, A.grid.numNodeCols };
    int coords[2]     = { A.grid.nodeRow, A.grid.nodeCol };

    auto &grid        = A.grid.comm;

    if (rank == 0) {
        MPI_Request requests[nof_processes-1]; int ri = 0;
        for (int i = 0; i < nof_processes; ++i) {
            MPI_Cart_coords(grid, i, 2, coords);

            auto B_ = B.block(rows.offset(coords[0]),
                              columns.offset(coords[1])
                      ).dim(rows.size(coords[0]), columns.size(coords[1]));
            if (i == 0) {
                hpc::matvec::copy(A.buffer, B_);
            } else {
                MPI_Irecv(&B_(0, 0), 1, get_type(B_),
                          i, 0, grid, &requests[ri++]);
            }
        }
        for (auto& request: requests) {
            MPI_Status status;
            MPI_Wait(&request, &status);
        }
    } else {
        auto &A_ = A.buffer;
        /* aged OpenMPI implementations of Debian wheezy and jessie
           expect void* instead of const void*; hence we need to remove const */
        MPI_Send(const_cast<void*>(reinterpret_cast<const void*>(&A_(0, 0))),
                 1, get_type(A_), 0, 0, grid);
    }
}

} } // namespaces mpi, hpc

#endif // HPC_MPI_COPY_HPP

Test run

theon$ mpic++ -g -std=c++17 -I. -I/home/numerik/pub/hpc/ws18/session25 -o test_gematrix test_gematrix.cpp
theon$ mpirun -np 4 test_gematrix
     0.00    10.00    20.00    30.00    40.00    50.00    60.00    70.00    80.00    90.00
     1.00    11.00    21.00    31.00    41.00    51.00    61.00    71.00    81.00    91.00
     2.00    12.00    22.00    32.00    42.00    52.00    62.00    72.00    82.00    92.00
     3.00    13.00    23.00    33.00    43.00    53.00    63.00    73.00    83.00    93.00
     4.00    14.00    24.00    34.00    44.00    54.00    64.00    74.00    84.00    94.00
     5.00    15.00    25.00    35.00    45.00    55.00    65.00    75.00    85.00    95.00
     6.00    16.00    26.00    36.00    46.00    56.00    66.00    76.00    86.00    96.00
     7.00    17.00    27.00    37.00    47.00    57.00    67.00    77.00    87.00    97.00
     8.00    18.00    28.00    38.00    48.00    58.00    68.00    78.00    88.00    98.00
     9.00    19.00    29.00    39.00    49.00    59.00    69.00    79.00    89.00    99.00

     0.00    10.00    20.00    30.00    40.00    50.00    60.00    70.00    80.00    90.00
     1.00    11.00    21.00    31.00    41.00    51.00    61.00    71.00    81.00    91.00
     2.00    12.00    22.00    32.00    42.00    52.00    62.00    72.00    82.00    92.00
     3.00    13.00    23.00    33.00    43.00    53.00    63.00    73.00    83.00    93.00
     4.00    14.00    24.00    34.00    44.00    54.00    64.00    74.00    84.00    94.00
     5.00    15.00    25.00    35.00    45.00    55.00    65.00    75.00    85.00    95.00
     6.00    16.00    26.00    36.00    46.00    56.00    66.00    76.00    86.00    96.00
     7.00    17.00    27.00    37.00    47.00    57.00    67.00    77.00    87.00    97.00
     8.00    18.00    28.00    38.00    48.00    58.00    68.00    78.00    88.00    98.00
     9.00    19.00    29.00    39.00    49.00    59.00    69.00    79.00    89.00    99.00
theon$