#include #include #include #include #include #include #include #include #include #include #include #include #include constexpr auto PI = std::acos(-1.0); constexpr auto E = std::exp(1.0); constexpr auto E_POWER_MINUS_PI = std::pow(E, -PI); /* used to specify an I/O transfer with one of our neighbors; each process has four input and four output operations per iteration */ struct IOTransfer { int rank; /* rank of neighbor */ void* addr; /* start address */ MPI_Datatype datatype; /* either a column or row */ }; /* perform a Jacobi iteration: A: previous state, i.e. A_n B: to be computed state A_{n+1} in_vectors & out_vectors: I/O operations with our four neighbors, reading into B (outer border) and sending from B (inner border) */ template typename Matrix::ElementType jacobi_iteration(const Matrix& A, Matrix& B, const IOTransfer (&in_vectors)[4], const IOTransfer (&out_vectors)[4]) { using ElementType = typename Matrix::ElementType; using Index = typename Matrix::Index; assert(A.numRows > 2 && A.numCols > 2); ElementType maxdiff = 0; auto jacobi = [&](Index i, Index j) { B(i, j) = 0.25 * (A(i - 1, j) + A(i + 1, j) + A(i, j - 1) + A(i, j + 1)); double diff = std::fabs(A(i, j) - B(i, j)); if (diff > maxdiff) maxdiff = diff; }; /* compute the border first which is sent in advance to our neighbors */ for (Index i = 1; i + 1 < B.numRows; ++i) { jacobi(i, 1); jacobi(i, B.numCols-2); } for (Index j = 2; j + 2 < B.numCols; ++j) { jacobi(1, j); jacobi(B.numRows-2, j); } /* send border to our neighbors and initiate the receive operations */ MPI_Request requests[8]; int ri = 0; for (auto& in_vector: in_vectors) { MPI_Irecv(in_vector.addr, 1, in_vector.datatype, in_vector.rank, 0, MPI_COMM_WORLD, &requests[ri++]); } for (auto& out_vector: out_vectors) { MPI_Isend(out_vector.addr, 1, out_vector.datatype, out_vector.rank, 0, MPI_COMM_WORLD, &requests[ri++]); } /* compute the inner block in parallel to the initiated communication */ for (Index i = 2; i + 2 < B.numRows; ++i) { for (Index j = 2; j + 2 < B.numCols; ++j) { jacobi(i, j); } } /* wait for the initiated I/O to finish */ for (auto& request: requests) { MPI_Status status; MPI_Wait(&request, &status); } return maxdiff; } int main(int argc, char** argv) { MPI_Init(&argc, &argv); int nof_processes; MPI_Comm_size(MPI_COMM_WORLD, &nof_processes); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); using namespace hpc::matvec; using namespace hpc::mpi; using namespace hpc::aux; using Matrix = GeMatrix; /* create two-dimensional Cartesian grid for our prcesses */ int dims[2] = {0, 0}; int periods[2] = {false, false}; MPI_Dims_create(nof_processes, 2, dims); MPI_Comm grid; MPI_Cart_create(MPI_COMM_WORLD, 2, // number of dimensions dims, // actual dimensions periods, // both dimensions are non-periodical true, // reorder is permitted &grid // newly created communication domain ); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed) /* initialize the entire matrix, including its borders */ Matrix A(100, 100, StorageOrder::RowMajor); using Index = GeMatrix::Index; if (rank == 0) { apply(A, [&](double& val, Index i, Index j) -> void { if (j == 0) { val = std::sin(PI * ((double)i/(A.numRows-1))); } else if (j == A.numCols - 1) { val = std::sin(PI * ((double)i/(A.numRows-1))) * E_POWER_MINUS_PI; } else { val = 0; } }); } /* get our position within the grid */ int coords[2]; MPI_Cart_coords(grid, rank, 2, coords); /* create matrices B1, B2 for our subarea */ int overlap = 1; Slices rows(dims[0], A.numRows - 2*overlap); Slices columns(dims[1], A.numCols - 2*overlap); Matrix B1(rows.size(coords[0]) + 2*overlap, columns.size(coords[1]) + 2*overlap, StorageOrder::RowMajor); Matrix B2(rows.size(coords[0]) + 2*overlap, columns.size(coords[1]) + 2*overlap, StorageOrder::RowMajor); /* distribute main body of A include left and right border */ scatter_by_block(A, B1, 0, grid, dims, coords, overlap); copy(B1, B2); /* actually just the border needs to be copied */ /* get the process numbers of our neighbors */ int left, right, upper, lower; MPI_Cart_shift(grid, 0, 1, &upper, &lower); MPI_Cart_shift(grid, 1, 1, &left, &right); /* compute type for inner rows and cols without the border */ auto B_inner_row = B1(0, 1, overlap, B1.numCols - 2*overlap); MPI_Datatype rowtype = get_type(B_inner_row); auto B_inner_col = B1(0, 1, B1.numRows - 2*overlap, overlap); MPI_Datatype coltype = get_type(B_inner_col); IOTransfer B1_in_vectors[] = { {left, &B1(1, 0), coltype}, {upper, &B1(0, 1), rowtype}, {right, &B1(1, B1.numCols-overlap), coltype}, {lower, &B1(B1.numRows-overlap, 1), rowtype}, }; IOTransfer B1_out_vectors[] = { {left, &B1(1, 1), coltype}, {upper, &B1(1, 1), rowtype}, {right, &B1(1, B1.numCols-2*overlap), coltype}, {lower, &B1(B1.numRows-2*overlap, 1), rowtype}, }; IOTransfer B2_in_vectors[] = { {left, &B2(1, 0), coltype}, {upper, &B2(0, 1), rowtype}, {right, &B2(1, B2.numCols-overlap), coltype}, {lower, &B2(B2.numRows-overlap, 1), rowtype}, }; IOTransfer B2_out_vectors[] = { {left, &B2(1, 1), coltype}, {upper, &B2(1, 1), rowtype}, {right, &B2(1, B2.numCols-2*overlap), coltype}, {lower, &B2(B2.numRows-2*overlap, 1), rowtype}, }; /* main loop for the Jacobi iterations */ double eps = 1e-6; unsigned int iterations; for (iterations = 0; ; ++iterations) { double maxdiff = jacobi_iteration(B1, B2, B2_in_vectors, B2_out_vectors); maxdiff = jacobi_iteration(B2, B1, B1_in_vectors, B1_out_vectors); if (iterations % 10 == 0) { double global_max; MPI_Reduce(&maxdiff, &global_max, 1, get_type(maxdiff), MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, get_type(maxdiff), 0, MPI_COMM_WORLD); if (global_max < eps) break; } } if (rank == 0) fmt::printf("%d iterations\n", iterations); /* collect results */ gather_by_block(B1, A, 0, grid, dims, coords, overlap); MPI_Finalize(); /* generate JPG file with the result */ if (rank == 0) { auto pixbuf = create_pixbuf(A, [](double val) -> HSVColor { return HSVColor((1-val) * 240, 1, 1); }, 8); gdk_pixbuf_save(pixbuf, "jacobi.jpg", "jpeg", nullptr, "quality", "100", nullptr); } }