#include #include #include #include #include #include #include template typename std::enable_if::value, void>::type randomInit(MA& A) { using ElementType = typename MA::ElementType; using Index = typename MA::Index; std::random_device random; std::mt19937 mt(random()); std::uniform_real_distribution uniform(-100,100); hpc::matvec::apply(A, [&](ElementType& val, Index i, Index j) -> void { val = uniform(mt); }); } volatile double global; int main() { using namespace hpc::matvec; using namespace hpc::aux; unsigned int nof_threads = std::thread::hardware_concurrency(); WallTime wall_time; constexpr unsigned int N = 5005; wall_time.tic(); { GeMatrix A(N, N); std::vector threads(nof_threads); unsigned int job_size = A.numRows / nof_threads; unsigned int remainder = A.numRows % nof_threads; GeMatrix::Index start = 0; for (int index = 0; index < nof_threads; ++index) { GeMatrix::Index num_rows = job_size; if (index < remainder) ++num_rows; auto A_ = A(start, 0, num_rows, A.numCols); threads[index] = std::thread([=]() mutable { randomInit(A_); }); start += num_rows; } for (int index = 0; index < nof_threads; ++index) { threads[index].join(); } global = A(7, 2); } double t1 = wall_time.toc(); wall_time.tic(); { GeMatrix A(N, N); std::vector threads(nof_threads); unsigned int job_size = A.numRows / nof_threads; unsigned int remainder = A.numRows % nof_threads; for (int index = 0; index < nof_threads; ++index) { GeMatrix::Index num_rows = job_size; if (index < remainder) ++num_rows; auto A_ = GeMatrixView(num_rows, A.numCols, &A(index, 0), A.incRow * nof_threads, A.incCol); threads[index] = std::thread([=]() mutable { randomInit(A_); }); } for (int index = 0; index < nof_threads; ++index) { threads[index].join(); } global = A(7, 2); } double t2 = wall_time.toc(); printf("t1 = %4.3lf, t2 = %4.3lf\n", t1, t2); }