Sample solution

Content

#include <cmath>
#include <cstddef>
#include <memory>
#include <gdk-pixbuf/gdk-pixbuf.h>
#include <printf.hpp>
#include <hpc/aux/hsvcolor.hpp>
#include <hpc/aux/rgbcolor.hpp>
#include <hpc/cuda/check.hpp>
#include <hpc/cuda/copy.hpp>
#include <hpc/cuda/gematrix.hpp>
#include <hpc/cuda/properties.hpp>
#include <hpc/matvec/gematrix.hpp>
#include <hpc/matvec/matrix2pixbuf.hpp>

using T = double;

using namespace hpc;

template<
   template<typename> class Matrix,
   typename T,
   Require< DeviceGe<Matrix<T>>, DeviceView<Matrix<T>> > = true
>
__global__ void init_matrix(Matrix<T> A) {
   std::size_t i = threadIdx.x + blockIdx.x * blockDim.x;
   std::size_t j = threadIdx.y + blockIdx.y * blockDim.y;

   const auto PI = std::acos(-T(1.0));
   const auto E = std::exp(T(1.0));
   const auto E_POWER_MINUS_PI = std::pow(E, -PI);

   std::size_t N = A.numRows();
   T value;
   if (j == N-1) {
      value = std::sin(M_PI * (static_cast<T>(i)/N)) * E_POWER_MINUS_PI;
   } else if (j == 0) {
      value = std::sin(M_PI * (static_cast<T>(i)/N));
   } else {
      value = 0;
   }
   A(i, j) = value;
}

template<typename T>
constexpr T int_sqrt(T n) {
   T result = 1;
   while (result * result <= n) {
      ++result;
   }
   return result - 1;
}

int main() {
   using namespace hpc::matvec;
   using namespace hpc::cuda;
   using namespace hpc::aux;

   const std::size_t max_threads = get_max_threads_per_block();
   const std::size_t BLOCK_DIM = int_sqrt(max_threads);
   const std::size_t GRID_DIM = 2;
   const std::size_t N = BLOCK_DIM * GRID_DIM;

   GeMatrix<T> A(N, N, Order::ColMajor);
   DeviceGeMatrix<T> devA(A.numRows(), A.numCols(), Order::ColMajor);
   copy(A, devA);
   dim3 block_dim(BLOCK_DIM, BLOCK_DIM);
   dim3 grid_dim(GRID_DIM, GRID_DIM);
   init_matrix<<<grid_dim, block_dim>>>(devA.view());
   copy(devA, A);

   auto pixbuf = create_pixbuf(A, [](T val) -> HSVColor<double> {
      return HSVColor<T>((1-val) * 240, 1, 1);
   }, 8);
   gdk_pixbuf_save(pixbuf, "jacobi.jpg", "jpeg", nullptr,
      "quality", "100", nullptr);
}
livingstone$ cp /home/numerik/pub/hpc/ws19/session29/misc/Makefile .
livingstone$ make depend
gcc-makedepend -std=c++14 -I/home/numerik/pub/hpc/ws19/session29 -D__CUDACC__ -x c++ jacobi3.cu
livingstone$ make jacobi3
nvcc -c -std=c++14 -I/home/numerik/pub/hpc/ws19/session29 -I/usr/include/gdk-pixbuf-2.0 -I/usr/include/libpng16 -I/usr/include/glib-2.0 -I/usr/lib/x86_64-linux-gnu/glib-2.0/include  jacobi3.cu
nvcc -w -o jacobi3 -std=c++14  jacobi3.o -lgdk_pixbuf-2.0 -lgobject-2.0 -lglib-2.0
livingstone$ nvprof ./jacobi3
==24673== NVPROF is profiling process 24673, command: ./jacobi3
==24673== Profiling application: ./jacobi3
==24673== Profiling result:
            Type  Time(%)      Time     Calls       Avg       Min       Max  Name
 GPU activities:   55.65%  18.752us         1  18.752us  18.752us  18.752us  [CUDA memcpy DtoH]
                   32.38%  10.912us         1  10.912us  10.912us  10.912us  [CUDA memcpy HtoD]
                   11.97%  4.0320us         1  4.0320us  4.0320us  4.0320us  void init_matrix<hpc::cuda::DeviceGeMatrixView, double, bool=1>(hpc<double>::cuda::DeviceGeMatrixView)
      API calls:   98.94%  244.98ms         1  244.98ms  244.98ms  244.98ms  cudaMalloc
                    0.41%  1.0184ms        97  10.498us  1.9560us  362.89us  cuDeviceGetAttribute
                    0.33%  805.18us         1  805.18us  805.18us  805.18us  cudaGetDeviceProperties
                    0.11%  280.82us         1  280.82us  280.82us  280.82us  cudaFree
                    0.10%  248.91us         1  248.91us  248.91us  248.91us  cuDeviceTotalMem
                    0.04%  99.102us         1  99.102us  99.102us  99.102us  cuDeviceGetName
                    0.04%  94.982us         2  47.491us  36.805us  58.177us  cudaMemcpy
                    0.02%  40.157us         1  40.157us  40.157us  40.157us  cudaLaunchKernel
                    0.01%  13.200us         1  13.200us  13.200us  13.200us  cuDeviceGetPCIBusId
                    0.01%  13.130us         1  13.130us  13.130us  13.130us  cudaGetDevice
                    0.00%  9.7070us         3  3.2350us  2.4440us  4.4690us  cuDeviceGetCount
                    0.00%  5.4470us         2  2.7230us  2.4440us  3.0030us  cuDeviceGet
                    0.00%  2.5840us         1  2.5840us  2.5840us  2.5840us  cuDeviceGetUuid
livingstone$