# Scatter operations with overlaps

#### Content

To accommodate the Jacobi solver, it would be helpful to support overlapping sections.

## Exercise

Extend scatter_by_block and gather_by_block by adding an optional parameter overlap that specifies the size of the border. Please consider that in case of overlap > 0 the scatter and gather operations are no longer symmetrical as scatter_by_block sends overlapping regions at least twice while gather_by_block sends just the inner sections.

template<typename T,
template<typename> class MA,
template<typename> class MB,
Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int scatter_by_block(const MA<T>& A, MB<T>& B, int root,
MPI_Comm grid, int dims, int coords, int overlap = 0) {
/* ... */
}

template<typename T,
template<typename> typename MA,
template<typename> typename MB,
Require<Ge<MA<T>>, Ge<MB<T>>> = true>
int gather_by_block(const MA<T>& A, MB<T>& B, int root,
MPI_Comm grid, int dims, int coords, int overlap = 0) {
/* ... */
}


## Source code

Following source code is available for testing:

#include <mpi.h>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/iterators.hpp>
#include <hpc/matvec/print.hpp>
#include <hpc/mpi/matrix.hpp>

int main(int argc, char** argv) {
MPI_Init(&argc, &argv);

int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes);
int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank);

using namespace hpc::matvec;
using namespace hpc::mpi;
using namespace hpc::aux;

using Matrix = GeMatrix<double>;
int share = 3;
int num_rows = nof_processes * share + 1;
int num_cols = nof_processes * share + 2;

Matrix A(num_rows, num_cols); /* entire matrix */

/* create two-dimensional Cartesian grid for our prcesses */
int dims = {0, 0}; int periods = {false, false};
MPI_Dims_create(nof_processes, 2, dims);
MPI_Comm grid;
MPI_Cart_create(MPI_COMM_WORLD,
2,        // number of dimensions
dims,     // actual dimensions
periods,  // both dimensions are non-periodical
true,     // reorder is permitted
&grid     // newly created communication domain
);
MPI_Comm_rank(grid, &rank); // update rank (could have changed)

/* get our position within the grid */
int overlap = 1;
int coords;
MPI_Cart_coords(grid, rank, 2, coords);
UniformSlices<int> rows(dims, A.numRows() - 2*overlap);
UniformSlices<int> columns(dims, A.numCols() - 2*overlap);

Matrix B(rows.size(coords) + 2*overlap,
columns.size(coords) + 2*overlap,
Order::RowMajor);

if (rank == 0) {
for (auto [i, j, Aij]: A) {
Aij = i * 100 + j;
}
}

scatter_by_block(A, B, 0, grid, overlap);
for (auto [i, j, Bij]: B) {
Bij += 10000 * (rank + 1);
(void) i; (void) j; // suppress gcc warning
}
gather_by_block(B, A, 0, grid, overlap);

MPI_Finalize();

if (rank == 0) {
print(A, " %6g");
}
}