# Lösungsvorschlag

## Grid Klasse

#ifndef HPC_MPI_GRID_H
#define HPC_MPI_GRID_H 1

#include <mpi.h>

namespace hpc { namespace mpi {

struct Grid
{
Grid()
{
int dims[2]    = {0, 0};
int periods[2] = {false, false};

/* create two-dimensional Cartesian grid for our prcesses */
MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
MPI_Dims_create(nof_processes, 2, dims);
MPI_Cart_create(MPI_COMM_WORLD,
2,        // number of dimensions
dims,     // actual dimensions
periods,  // both dimensions are non-periodical
true,     // reorder is permitted
&comm     // newly created communication domain
);
MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed)
numNodeRows = dims[0];
numNodeCols = dims[1];

/* get our position within the grid */
int coords[2];
MPI_Cart_coords(comm, rank, 2, coords);
nodeRow = coords[0];
nodeCol = coords[1];
}

int         numNodeRows, numNodeCols;
int         nodeRow, nodeCol;
int         nof_processes, rank;

MPI_Comm    comm;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GRID_H


## Klasse für eine verteilte GeMatrix

#ifndef HPC_MPI_GEMATRIX_H
#define HPC_MPI_GEMATRIX_H 1

#include <cassert>
#include <memory>
#include <type_traits>
#include <mpi.h>
#include <hpc/aux/ceildiv.h>
#include <hpc/aux/slices.h>
#include <hpc/matvec/gematrix.h>
#include <hpc/matvec/isgematrix.h>
#include <hpc/matvec/copy.h>
#include <hpc/mpi/fundamental.h>
#include <hpc/mpi/grid.h>

namespace hpc { namespace mpi {

template <typename T, typename I>
struct GeMatrix
{
typedef T   ElementType;
typedef I   Index;

using StorageOrder = hpc::matvec::StorageOrder;

GeMatrix(Index numRows, Index numCols, Grid grid)
: numRows(numRows), numCols(numCols), grid(grid),
buffer(getNumRows(grid.nodeRow), getNumCols(grid.nodeCol))
{
}

Index
getRowOffset(Index nodeRow) const
{
Index mb = (numRows+grid.numNodeRows-1)/grid.numNodeRows;
return mb*nodeRow;
}

Index
getColOffset(Index nodeCol) const
{
Index nb = (numCols+grid.numNodeCols-1)/grid.numNodeCols;
return nb*nodeCol;
}

Index
getNumRows(Index nodeRow) const
{
return numRows - getRowOffset(nodeRow);
}

Index
getNumCols(Index nodeCol) const
{
return numCols - getColOffset(nodeCol);
}

Index                       numRows, numCols;
Grid                        grid;
hpc::matvec::GeMatrix<T,I>  buffer;
};

} } // namespaces mpi, hpc

#endif // HPC_MPI_GEMATRIX_H


## Copy Funktion für Scatter und Gather

#ifndef HPC_MPI_COPY_H
#define HPC_MPI_COPY_H 1

#include <cassert>
#include <mpi.h>
#include <hpc/matvec/gematrix.h>
#include <hpc/matvec/isgematrix.h>
#include <hpc/matvec/copy.h>
#include <hpc/mpi/gematrix.h>
#include <hpc/mpi/matrix.h>

namespace hpc { namespace mpi {

// scatter
template <typename T, typename Index>
void
copy(const hpc::matvec::GeMatrix<T, Index> &A, GeMatrix<T, Index> &B)
{
if (B.grid.rank == 0) {
MPI_Request requests[B.grid.nof_processes-1];
int ri = 0;
for (int i = 0; i < B.grid.nof_processes; ++i) {
int coords[2];
MPI_Cart_coords(B.grid.comm, i, 2, coords);
auto A_ = A(B.getRowOffset(coords[0]), B.getColOffset(coords[1]),
B.getNumRows(coords[0]), B.getNumCols(coords[1]));
if (i == 0) {
hpc::matvec::copy(A_, B.buffer);
} else {
MPI_Isend((void*)&A_(0, 0), 1, get_type(A_),
i, 0, B.grid.comm, &requests[ri++]);
}
}
for (auto& request: requests) {
MPI_Status status;
MPI_Wait(&request, &status);
}
} else {
MPI_Status status;
T &B00 = B.buffer(0, 0);
MPI_Recv((void*)&B00, 1, get_type(B.buffer), 0, 0, B.grid.comm, &status);
}
}

// gather
template <typename T, typename Index>
void
copy(const GeMatrix<T, Index> &A, hpc::matvec::GeMatrix<T, Index> &B)
{
if (A.grid.rank == 0) {
MPI_Request requests[A.grid.nof_processes-1];
int ri = 0;
for (int i = 0; i < A.grid.nof_processes; ++i) {
int coords[2];
MPI_Cart_coords(A.grid.comm, i, 2, coords);
auto B_ = B(A.getRowOffset(coords[0]), A.getColOffset(coords[1]),
A.getNumRows(coords[0]), A.getNumCols(coords[1]));
if (i == 0) {
hpc::matvec::copy(A.buffer, B_);
} else {
MPI_Irecv(&B_(0, 0), 1, get_type(B_),
i, 0, A.grid.comm, &requests[ri++]);
}
}
for (auto& request: requests) {
MPI_Status status;
MPI_Wait(&request, &status);
}
} else {
const T &A00 = A.buffer(0, 0);
MPI_Send((void *)&A00, 1, get_type(A.buffer), 0, 0, A.grid.comm);
}
}

} } // namespaces mpi, hpc

#endif // HPC_MPI_COPY_H


## Test-Programm

#include <hpc/matvec/print.h>
#include <hpc/mpi/grid.h>
#include <hpc/mpi/gematrix.h>
#include <hpc/mpi/copy.h>
#include <mpi.h>
#include <cstdio>

int
main(int argc, char** argv)
{
MPI_Init(&argc, &argv);

typedef std::size_t     Index;

hpc::mpi::Grid          grid;

Index numRows = 10;
Index numCols = 10;

hpc::matvec::GeMatrix<double, Index>    A(numRows, numCols);
hpc::matvec::GeMatrix<double, Index>    B(numRows, numCols);
hpc::mpi::GeMatrix<double, Index>       dA(numRows, numCols, grid);

if (grid.nodeRow==0 && grid.nodeCol==0) {
for (Index i=0; i<numRows; ++i) {
for (Index j=0; j<numCols; ++j) {
A(i,j) = i + j*numRows;
}
}
}

if (grid.nodeRow==0 && grid.nodeCol==0) {
hpc::matvec::print(A, "A");
}

// scatter
hpc::mpi::copy(A, dA);

// gather
hpc::mpi::copy(dA, B);

if (grid.nodeRow==0 && grid.nodeCol==0) {
hpc::matvec::print(B, "B");
}

MPI_Finalize();
}


## Ausführen des Tests

$shell> mpic++ -O3 -Wall -std=c++11 -I../.. mpi_gematrix.cc$shell> mpirun -np 4 ./a.out
A =
0.0       10.0       20.0       30.0       40.0       50.0       60.0       70.0       80.0       90.0
1.0       11.0       21.0       31.0       41.0       51.0       61.0       71.0       81.0       91.0
2.0       12.0       22.0       32.0       42.0       52.0       62.0       72.0       82.0       92.0
3.0       13.0       23.0       33.0       43.0       53.0       63.0       73.0       83.0       93.0
4.0       14.0       24.0       34.0       44.0       54.0       64.0       74.0       84.0       94.0
5.0       15.0       25.0       35.0       45.0       55.0       65.0       75.0       85.0       95.0
6.0       16.0       26.0       36.0       46.0       56.0       66.0       76.0       86.0       96.0
7.0       17.0       27.0       37.0       47.0       57.0       67.0       77.0       87.0       97.0
8.0       18.0       28.0       38.0       48.0       58.0       68.0       78.0       88.0       98.0
9.0       19.0       29.0       39.0       49.0       59.0       69.0       79.0       89.0       99.0

B =
0.0       10.0       20.0       30.0       40.0       50.0       60.0       70.0       80.0       90.0
1.0       11.0       21.0       31.0       41.0       51.0       61.0       71.0       81.0       91.0
2.0       12.0       22.0       32.0       42.0       52.0       62.0       72.0       82.0       92.0
3.0       13.0       23.0       33.0       43.0       53.0       63.0       73.0       83.0       93.0
4.0       14.0       24.0       34.0       44.0       54.0       64.0       74.0       84.0       94.0
5.0       15.0       25.0       35.0       45.0       55.0       65.0       75.0       85.0       95.0
6.0       16.0       26.0       36.0       46.0       56.0       66.0       76.0       86.0       96.0
7.0       17.0       27.0       37.0       47.0       57.0       67.0       77.0       87.0       97.0
8.0       18.0       28.0       38.0       48.0       58.0       68.0       78.0       88.0       98.0
9.0       19.0       29.0       39.0       49.0       59.0       69.0       79.0       89.0       99.0
\$shell>