# Possible solution

#### Content

Basically we use the solution from the jacobi solver in the last session (just without any overlap).

## Source code

#ifndef HPC_MPI_GRID_HPP
#define HPC_MPI_GRID_HPP 1

#include <mpi.h>

namespace hpc { namespace mpi {

struct Grid
{
Grid()
{
MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

int dims[2] = {0, 0};
int periods[2] = {false, false};
MPI_Dims_create(nof_processes, 2, dims);
MPI_Cart_create(MPI_COMM_WORLD,
2,        // number of dimensions
dims,     // actual dimensions
periods,  // both dimensions are non-periodical
true,     // reorder is permitted
&comm     // newly created communication domain
);
MPI_Comm_rank(comm, &rank); // update rank (could have changed)
numNodeRows = dims[0];
numNodeCols = dims[1];

/* get our position within the grid */
int coords[2];
MPI_Cart_coords(comm, rank, 2, coords);
nodeRow = coords[0];
nodeCol = coords[1];
}

int         numNodeRows, numNodeCols;
int         nodeRow, nodeCol;
int         nof_processes, rank;

MPI_Comm    comm;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GRID_HPP

#ifndef HPC_MPI_GEMATRIX_HPP
#define HPC_MPI_GEMATRIX_HPP 1

#include <cassert>
#include <memory>
#include <type_traits>
#include <mpi.h>
#include <hpc/aux/slices.hpp>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/mpi/fundamental.hpp>
#include "grid.hpp"

namespace hpc { namespace mpi {

template <typename T>
struct GeMatrix
{
GeMatrix(std::size_t numRows, std::size_t numCols, Grid grid)
: numRows(numRows), numCols(numCols),
rows(grid.numNodeRows, numRows),
cols(grid.numNodeCols, numCols),
grid(grid),
buffer(rows.size(grid.nodeRow),cols.size(grid.nodeCol))
{
}

std::size_t
rowOffset(int rank) const
{
/* get our position within the grid */
int coords[2];
MPI_Cart_coords(grid.comm, rank, 2, coords);
return rows.offset(coords[0]);
}

std::size_t
colOffset(int rank) const
{
/* get our position within the grid */
int coords[2];
MPI_Cart_coords(grid.comm, rank, 2, coords);
return cols.offset(coords[1]);
}

std::size_t
numLocalRows(int rank) const
{
/* get our position within the grid */
int coords[2];
MPI_Cart_coords(grid.comm, rank, 2, coords);
return rows.size(coords[0]);
}

std::size_t
numLocalCols(int rank) const
{
/* get our position within the grid */
int coords[2];
MPI_Cart_coords(grid.comm, rank, 2, coords);
return cols.size(coords[1]);
}

std::size_t                  numRows, numCols;
hpc::aux::UniformSlices<int> rows, cols;
Grid                         grid;
hpc::matvec::GeMatrix<T>     buffer;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GEMATRIX_HPP

#ifndef HPC_MPI_COPY_HPP
#define HPC_MPI_COPY_HPP 1

#include <cassert>
#include <mpi.h>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/copy.hpp>
#include "gematrix.hpp"
#include <hpc/mpi/matrix.hpp>

namespace hpc { namespace mpi {

// scatter: Basically this is 'scatter_by_block' from session 25
template <typename T>
void
copy(const hpc::matvec::GeMatrix<T> &A, GeMatrix<T> &B)
{
int nof_processes = B.grid.nof_processes;
int rank          = B.grid.rank;
auto &rows        = B.rows;
auto &columns     = B.cols;

int dims[2]       = { B.grid.numNodeRows, B.grid.numNodeCols };
int coords[2]     = { B.grid.nodeRow, B.grid.nodeCol };

auto &grid        = B.grid.comm;

if (rank == 0) {
MPI_Request requests[nof_processes-1]; int ri = 0;
for (int i = 0; i < nof_processes; ++i) {
MPI_Cart_coords(grid, i, 2, coords);

auto A_ = A.block(rows.offset(coords[0]),
columns.offset(coords[1])
).dim(rows.size(coords[0]),
columns.size(coords[1]));
if (i == 0) {
hpc::matvec::copy(A_, B.buffer);
} else {
/* aged OpenMPI implementations of Debian wheezy and jessie
expect void* instead of const void*;
hence we need to remove const */
MPI_Isend(
const_cast<void*>(reinterpret_cast<const void*>(&A_(0, 0))),
1, get_type(A_), i, 0, grid, &requests[ri++]);
}
}
for (auto& request: requests) {
MPI_Status status;
MPI_Wait(&request, &status);
}
} else {
auto &B_ = B.buffer;

MPI_Status status;
MPI_Recv(&B_(0, 0), 1, get_type(B_), 0, 0, grid, &status);
}
}

// gather: Basically this is 'gather_by_block' from session 25
template <typename T>
void
copy(const GeMatrix<T> &A, hpc::matvec::GeMatrix<T> &B)
{
int nof_processes = A.grid.nof_processes;
int rank          = A.grid.rank;
auto &rows        = A.rows;
auto &columns     = A.cols;

int dims[2]       = { A.grid.numNodeRows, A.grid.numNodeCols };
int coords[2]     = { A.grid.nodeRow, A.grid.nodeCol };

auto &grid        = A.grid.comm;

if (rank == 0) {
MPI_Request requests[nof_processes-1]; int ri = 0;
for (int i = 0; i < nof_processes; ++i) {
MPI_Cart_coords(grid, i, 2, coords);

auto B_ = B.block(rows.offset(coords[0]),
columns.offset(coords[1])
).dim(rows.size(coords[0]), columns.size(coords[1]));
if (i == 0) {
hpc::matvec::copy(A.buffer, B_);
} else {
MPI_Irecv(&B_(0, 0), 1, get_type(B_),
i, 0, grid, &requests[ri++]);
}
}
for (auto& request: requests) {
MPI_Status status;
MPI_Wait(&request, &status);
}
} else {
auto &A_ = A.buffer;
/* aged OpenMPI implementations of Debian wheezy and jessie
expect void* instead of const void*; hence we need to remove const */
MPI_Send(const_cast<void*>(reinterpret_cast<const void*>(&A_(0, 0))),
1, get_type(A_), 0, 0, grid);
}
}

} } // namespaces mpi, hpc

#endif // HPC_MPI_COPY_HPP


## Test run

theon$mpic++ -g -std=c++17 -I. -I/home/numerik/pub/hpc/ws18/session25 -o test_gematrix test_gematrix.cpp theon$ mpirun -np 4 test_gematrix
0.00    10.00    20.00    30.00    40.00    50.00    60.00    70.00    80.00    90.00
1.00    11.00    21.00    31.00    41.00    51.00    61.00    71.00    81.00    91.00
2.00    12.00    22.00    32.00    42.00    52.00    62.00    72.00    82.00    92.00
3.00    13.00    23.00    33.00    43.00    53.00    63.00    73.00    83.00    93.00
4.00    14.00    24.00    34.00    44.00    54.00    64.00    74.00    84.00    94.00
5.00    15.00    25.00    35.00    45.00    55.00    65.00    75.00    85.00    95.00
6.00    16.00    26.00    36.00    46.00    56.00    66.00    76.00    86.00    96.00
7.00    17.00    27.00    37.00    47.00    57.00    67.00    77.00    87.00    97.00
8.00    18.00    28.00    38.00    48.00    58.00    68.00    78.00    88.00    98.00
9.00    19.00    29.00    39.00    49.00    59.00    69.00    79.00    89.00    99.00

0.00    10.00    20.00    30.00    40.00    50.00    60.00    70.00    80.00    90.00
1.00    11.00    21.00    31.00    41.00    51.00    61.00    71.00    81.00    91.00
2.00    12.00    22.00    32.00    42.00    52.00    62.00    72.00    82.00    92.00
3.00    13.00    23.00    33.00    43.00    53.00    63.00    73.00    83.00    93.00
4.00    14.00    24.00    34.00    44.00    54.00    64.00    74.00    84.00    94.00
5.00    15.00    25.00    35.00    45.00    55.00    65.00    75.00    85.00    95.00
6.00    16.00    26.00    36.00    46.00    56.00    66.00    76.00    86.00    96.00
7.00    17.00    27.00    37.00    47.00    57.00    67.00    77.00    87.00    97.00
8.00    18.00    28.00    38.00    48.00    58.00    68.00    78.00    88.00    98.00
9.00    19.00    29.00    39.00    49.00    59.00    69.00    79.00    89.00    99.00
theon\$