#include #include #include #include #include #include #include #include #include #include #include #include template struct RandomEnginePool { using EngineType = T; RandomEnginePool(std::size_t size) : size(size), nof_free_engines(size), inuse(size), engines(size) { std::random_device r; for (std::size_t i = 0; i < size; ++i) { engines[i].seed(r()); inuse[i] = false; } } T& get() { std::unique_lock lock(mutex); if (nof_free_engines == 0) { cv.wait(lock); } for (std::size_t i = 0; i < size; ++i) { if (!inuse[i]) { inuse[i] = true; --nof_free_engines; return engines[i]; } } assert(false); } void free(T& engine) { { std::unique_lock lock(mutex); bool found = false; for (std::size_t i = 0; i < size; ++i) { if (&engine == &engines[i]) { inuse[i] = false; ++nof_free_engines; found = true; break; } } assert(found); } cv.notify_one(); } private: std::mutex mutex; std::condition_variable cv; std::size_t size; std::size_t nof_free_engines; std::vector inuse; std::vector engines; }; template struct RandomEngineGuard { using EngineType = T; RandomEngineGuard(RandomEnginePool& pool) : pool(pool), engine(pool.get()) { } ~RandomEngineGuard() { pool.free(engine); } T& get() { return engine; } RandomEnginePool& pool; T& engine; }; template typename std::enable_if::value, void>::type randomInit(MA& A, Pool& pool) { using ElementType = typename MA::ElementType; using Index = typename MA::Index; using EngineType = typename Pool::EngineType; std::uniform_real_distribution uniform(-100, 100); RandomEngineGuard guard(pool); auto& engine(guard.get()); hpc::matvec::apply(A, [&](ElementType& val, Index i, Index j) -> void { val = uniform(engine); }); } struct ThreadPool { public: using Job = std::function; ThreadPool(unsigned int nof_threads) : nof_threads(nof_threads), active(0), finished(false), threads(nof_threads) { for (auto& t: threads) { t = std::thread([=]() { process_jobs(); }); } } ~ThreadPool() { { std::unique_lock lock(mutex); finished = true; } cv.notify_all(); for (auto& t: threads) { t.join(); } } void submit(Job job) { std::unique_lock lock(mutex); jobs.push_back(std::move(job)); cv.notify_one(); } private: unsigned int nof_threads; unsigned int active; bool finished; std::vector threads; std::mutex mutex; std::condition_variable cv; std::list jobs; void process_jobs() { for(;;) { Job job; /* fetch job */ { std::unique_lock lock(mutex); while (jobs.empty() && (active > 0 || !finished)) { cv.wait(lock); } if (jobs.empty() && active == 0 && finished) break; job = std::move(jobs.front()); jobs.pop_front(); ++active; } /* execute job */ job(); { std::unique_lock lock(mutex); --active; } } /* if one thread finishes, all others have to finish as well */ cv.notify_all(); } }; struct JobStatus { public: JobStatus() : finished(false) { } void done() { std::unique_lock lock(mutex); assert(!finished); finished = true; cv.notify_all(); } void wait() { std::unique_lock lock(mutex); if (!finished) { cv.wait(lock); } } private: std::mutex mutex; std::condition_variable cv; bool finished; }; template typename std::enable_if::value, void>::type randomInit(MA& A, RandomEnginePool& repool, ThreadPool& tpool, typename MA::Index maxRows, typename MA::Index maxCols) { using ElementType = typename MA::ElementType; using Index = typename MA::Index; using namespace hpc::aux; Index nof_row_slices = A.numRows / maxRows; if (A.numRows % maxRows) ++nof_row_slices; Index nof_col_slices = A.numCols / maxCols; if (A.numCols % maxCols) ++nof_col_slices; std::vector job_status(nof_row_slices * nof_col_slices); Index job_index = 0; foreach_slice(nof_row_slices, A.numRows, [&] (Index rows_offset, Index rows_size) { foreach_slice(nof_col_slices, A.numCols, [&,rows_offset,rows_size] (Index cols_offset, Index cols_size) { auto A_ = A(rows_offset, cols_offset, rows_size, cols_size); tpool.submit([=, &job_status, &repool]() mutable { randomInit(A_, repool); job_status[job_index].done(); }); ++job_index; }); }); for (auto& js: job_status) { js.wait(); } } int main() { using namespace hpc::matvec; using namespace hpc::aux; unsigned int nof_threads = std::thread::hardware_concurrency(); RandomEnginePool repool(nof_threads); ThreadPool tpool(nof_threads); GeMatrix A(51, 7); randomInit(A, repool, tpool, 8, 8); print(A, "A"); }