#include #include #include #include #include #include #include // M_E and M_PI are not part of ISO C++ #ifndef M_E #define M_E 2.7182818284590452354 #endif #ifndef M_PI #define M_PI 3.14159265358979323846 #endif using namespace std; typedef flens::GeMatrix > Matrix; void get_partition(int len, int nofprocesses, int rank, int& start, int& locallen) { locallen = (len - rank - 1) / nofprocesses + 1; int share = len / nofprocesses; int remainder = len % nofprocesses; start = rank * share + (rank < remainder? rank: remainder); } void get_submatrix(const MPI_Comm& grid, int* dims, int n, int rank, int& first_row, int& first_col, int& nof_rows, int& nof_cols) { int coords[2]; MPI_Cart_coords(grid, rank, 2, coords); // retrieve our position get_partition(n, dims[0], coords[0], first_row, nof_rows); get_partition(n, dims[1], coords[1], first_col, nof_cols); } void initialize_A(double& Aij, int i, int j, int N) { const static double E_POWER_MINUS_PI = pow(M_E, -M_PI); if (j == 0) { Aij = sin(M_PI * ((double)i/(N-1))); } else if (j == N-1) { Aij = sin(M_PI * ((double)i/(N-1))) * E_POWER_MINUS_PI; } else { Aij = 0; } } // single Jacobi iteration step double single_jacobi_iteration(Matrix& A, Matrix& B) { for (int i = B.firstRow(); i <= B.lastRow(); ++i) { for (int j = B.firstCol(); j <= B.lastCol(); ++j) { B(i,j) = 0.25 * (A(i-1,j) + A(i,j-1) + A(i,j+1) + A(i+1,j)); } } double maxdiff = 0; for (int i = B.firstRow(); i <= B.lastRow(); ++i) { for (int j = B.firstCol(); j <= B.lastCol(); ++j) { double diff = fabs(A(i,j) - B(i,j)); if (diff > maxdiff) maxdiff = diff; A(i,j) = B(i,j); } } return maxdiff; } MPI_Datatype vector_type(int len, int stride) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ len, /* blocklength = */ 1, /* stride = */ stride, /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } MPI_Datatype matrix_type(const Matrix::View& submatrix) { MPI_Datatype datatype; MPI_Type_vector( /* count = */ submatrix.numRows(), /* blocklength = */ submatrix.numCols(), /* stride = */ submatrix.engine().leadingDimension(), /* element type = */ MPI_DOUBLE, /* newly created type = */ &datatype); MPI_Type_commit(&datatype); return datatype; } // 2D-partitioned task Matrix* run_jacobi_iteration(int rank, int nofprocesses, int N, double eps) { int n = N - 2; // without the surrounding border // create two-dimensional Cartesian grid int dims[2] = {0, 0}; int periods[2] = {false, false}; MPI_Dims_create(nofprocesses, 2, dims); MPI_Comm grid; MPI_Cart_create(MPI_COMM_WORLD, 2, // number of dimensions dims, // actual dimensions periods, // both dimensions are non-periodical true, // reorder is permitted &grid // newly created communication domain ); MPI_Comm_rank(MPI_COMM_WORLD, &rank); // update rank (could have changed) // locate our own submatrix int first_row, nof_rows, first_col, nof_cols; get_submatrix(grid, dims, n, rank, first_row, first_col, nof_rows, nof_cols); Matrix A(nof_rows + 2, nof_cols + 2, first_row, first_col); Matrix B(nof_rows, nof_cols, first_row + 1, first_col + 1); for (int i = A.firstRow(); i <= A.lastRow(); ++i) { for (int j = A.firstCol(); j <= A.lastCol(); ++j) { initialize_A(A(i, j), i, j, N); } } // create the associated vector views struct buffer { double* buf; MPI_Datatype type; }; struct buffer in_vectors[] = { {&A(A.firstRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.lastRow(), A.firstCol() + 1), vector_type(nof_cols, 1)}, {&A(A.firstRow() + 1, A.firstCol()), vector_type(nof_rows, nof_cols + 2)}, {&A(A.firstRow() + 1, A.lastCol()), vector_type(nof_rows, nof_cols + 2)} }; struct buffer out_vectors[] = { {&B(B.lastRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_cols, 1)}, {&B(B.firstRow(), B.lastCol()), vector_type(nof_rows, nof_cols)}, {&B(B.firstRow(), B.firstCol()), vector_type(nof_rows, nof_cols)} }; // get the process numbers of our neighbors int left, right, upper, lower; MPI_Cart_shift(grid, 0, 1, &upper, &lower); MPI_Cart_shift(grid, 1, 1, &left, &right); int in_neighbor[] = {upper, lower, left, right}; int out_neighbor[] = {lower, upper, right, left}; for(;;) { double maxdiff = single_jacobi_iteration(A, B); double global_max; MPI_Reduce(&maxdiff, &global_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Bcast(&global_max, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); if (global_max < eps) break; // exchange borders with our neighbors for (int dir = 0; dir < 4; ++dir) { MPI_Status status; MPI_Sendrecv( out_vectors[dir].buf, 1, out_vectors[dir].type, out_neighbor[dir], 0, in_vectors[dir].buf, 1, in_vectors[dir].type, in_neighbor[dir], 0, MPI_COMM_WORLD, &status ); } } // collect results in process 0 Matrix* Result = 0; if (rank == 0) { Result = new Matrix(N, N, 0, 0); assert(Result); for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { initialize_A((*Result)(i,j), i, j, N); } } for (int p = 0; p < nofprocesses; ++p) { int first_row, first_col, nof_rows, nof_cols; get_submatrix(grid, dims, n, p, first_row, first_col, nof_rows, nof_cols); ++first_row; ++first_col; Matrix::View submatrix(Result->engine().view(first_row, first_col, first_row + nof_rows - 1, first_col + nof_cols - 1, first_row, first_col)); if (p == 0) { submatrix = B; } else { MPI_Status status; MPI_Recv( &submatrix(submatrix.firstRow(), submatrix.firstCol()), 1, matrix_type(submatrix), p, 0, MPI_COMM_WORLD, &status); } } } else { MPI_Send(&B(B.firstRow(), B.firstCol()), B.numRows() * B.numCols(), MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); } return Result; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [N [eps]] " << endl; MPI_Abort(MPI_COMM_WORLD, 1); } int main(int argc, char** argv) { int N = 10; double eps = 1e-6; MPI_Init(&argc, &argv); int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); int nofprocesses; MPI_Comm_size(MPI_COMM_WORLD, &nofprocesses); if (rank == 0) { cmdname = *argv++; --argc; if (argc > 2) usage(); if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N < 3 || nofprocesses > (N-2)*(N-2)) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> eps) || eps <= 0) usage(); } } MPI_Bcast(&N, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); Matrix* A = run_jacobi_iteration(rank, nofprocesses, N, eps); if (rank == 0) { cout << N << endl; for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { cout << " " << (*A)(i,j); } cout << endl; } delete A; } MPI_Finalize(); }