Lösungsvorschlag

#include <cstdio>
#include <hpc/matvec/gematrix.h>
#include <hpc/matvec/apply.h>
#include <hpc/matvec/print.h>
#include <hpc/matvec/asum.h>
#include <hpc/aux/slices.h>
#include <hpc/mt/thread_pool.h>
#include <hpc/aux/repool.h>

template<typename MA, typename Pool>
typename std::enable_if<hpc::matvec::IsRealGeMatrix<MA>::value, void>::type
randomInit(MA& A, Pool& pool) {
   using ElementType = typename MA::ElementType;
   using Index = typename MA::Index;
   using EngineType = typename Pool::EngineType;

   std::uniform_real_distribution<double> uniform(-100, 100);
   hpc::aux::RandomEngineGuard<EngineType> guard(pool);
   auto& engine(guard.get());

   hpc::matvec::apply(A, [&](ElementType& val, Index i, Index j) -> void {
      val = uniform(engine);
   });
}

template<typename MA, typename RandomEnginePool>
typename std::enable_if<hpc::matvec::IsRealGeMatrix<MA>::value, void>::type
randomInit(MA& A, RandomEnginePool& repool, hpc::mt::ThreadPool& tpool,
      typename MA::Index maxRows,
      typename MA::Index maxCols) {
   using ElementType = typename MA::ElementType;
   using Index = typename MA::Index;
   using namespace hpc::aux;

   Index nof_row_slices = A.numRows / maxRows;
   if (A.numRows % maxRows) ++nof_row_slices;
   Index nof_col_slices = A.numCols / maxCols;
   if (A.numCols % maxCols) ++nof_col_slices;

   std::vector<std::future<void>> futures(nof_row_slices * nof_col_slices);

   Index job_index = 0;
   foreach_slice(nof_row_slices, A.numRows, [&]
         (Index rows_offset, Index rows_size) {
      foreach_slice(nof_col_slices, A.numCols, [&,rows_offset,rows_size]
            (Index cols_offset, Index cols_size) {
         auto A_ = A(rows_offset, cols_offset, rows_size, cols_size);
         futures[job_index++] = tpool.submit([=, &repool]() mutable {
            randomInit(A_, repool);
         });
      });
   });

   for (auto& future: futures) {
      future.get();
   }
}

template<typename MA>
auto asum(MA& A, hpc::mt::ThreadPool& tpool,
      typename MA::Index maxRows,
      typename MA::Index maxCols) -> decltype(hpc::matvec::asum(A)) {
   using ElementType = typename MA::ElementType;
   using Index = typename MA::Index;
   using Result = decltype(asum(A));
   using namespace hpc::matvec;
   using namespace hpc::aux;

   Index nof_row_slices = A.numRows / maxRows;
   if (A.numRows % maxRows) ++nof_row_slices;
   Index nof_col_slices = A.numCols / maxCols;
   if (A.numCols % maxCols) ++nof_col_slices;

   std::vector<std::future<Result>> futures(nof_row_slices * nof_col_slices);

   Index job_index = 0;
   foreach_slice(nof_row_slices, A.numRows, [&]
         (Index rows_offset, Index rows_size) {
      foreach_slice(nof_col_slices, A.numCols, [&,rows_offset,rows_size]
            (Index cols_offset, Index cols_size) {
         auto A_ = A(rows_offset, cols_offset, rows_size, cols_size);
         futures[job_index++] = tpool.submit([=]() -> Result {
            return asum(A_);
         });
      });
   });

   Result sum(0);
   for (auto& future: futures) {
      sum += future.get();
   }
   return sum;
}

int main() {
   using namespace hpc::matvec;
   using namespace hpc::aux;
   using namespace hpc::mt;
   unsigned int nof_threads = std::thread::hardware_concurrency();
   RandomEnginePool<std::mt19937> repool(nof_threads);
   ThreadPool tpool(nof_threads);

   GeMatrix<double> A(51, 7);
   randomInit(A, repool, tpool, 8, 8);
   auto sum = asum(A, tpool, 8, 8);
   std::printf("asum (mt) = %lg\n", sum);
   std::printf("asum = %lg\n", asum(A));
}