Simple Jacobi solver for the GPU
Content |
We revisit now the Jacobi solver for the GPU. We start with a very simple approach where we operate with one block on the GPU only. This easies synchronization as the threads of a block can easily use __sync_threads() to keep in sync.
For reasons of simplicity we work initially with a fixed number of iterations. We will see later how this can be improved.
Exercise
Develop a kernel function for the Jacobi solver that operates on one block only. The number of iterations is to be passed as parameter. Each thread shall operate only on one \(A_{i,j}\).
Try to solve this with one matrix only. This can indeed be done with proper synchronization. Make sure that the kernel operates on the inner part of \(A\) only.
Think about you access the matrix \(A\) within the kernel function. Which approach is more cache-friendly for the GPU? Or, alternatively, try both variants of matrix storage organisation. Compare the times of both variants.
You can simply profile your application using the nvprof utility, i.e. invoke nvprof ./jacobi1 instead of simply ./jacobi1. Look for the first section titled GPU activities where you will find void jacobi... – interesting is the total time and the average time (Avg).
Explain the difference. Consider that within a warp threadIdx.y is identical for all threads but the values of threadIdx.x are numbered consecutively.
The lecture library is available at /home/numerik/pub/pp/ss19/lib.
Skeleton
#include <cassert> #include <cmath> #include <printf.hpp> #include <hpc/aux/hsvcolor.hpp> #include <hpc/cuda/check.hpp> #include <hpc/cuda/copy.hpp> #include <hpc/cuda/properties.hpp> #include <hpc/matvec/for-all-indices.hpp> #include <hpc/matvec/gematrix.hpp> #include <hpc/matvec/matrix2pixbuf.hpp> template<typename T> const T PI = std::acos(T(-1.0)); template<typename T> const T E = std::exp(T(1.0)); template<typename T> const T E_POWER_MINUS_PI = std::pow(E<T>, -PI<T>); using namespace hpc; template< template<typename> class Matrix, typename T, Require< DeviceGe<Matrix<T>>, DeviceView<Matrix<T>> > = true > __global__ void jacobi(Matrix<T> A, unsigned int nofiterations) { /* to be done */ } template<typename T> constexpr T int_sqrt(T n) { T result = 1; while (result * result <= n) { ++result; } return result - 1; } int main(int argc, char** argv) { using namespace hpc::aux; using namespace hpc::cuda; using namespace hpc::matvec; using T = double; std::size_t max_threads = get_max_threads_per_block(); std::size_t N = int_sqrt(max_threads); GeMatrix<T> A(N + 2, N + 2, Order::RowMajor); /* initialize the entire matrix, including its borders */ for_all_indices(A, [&](std::size_t i, std::size_t j) -> void { if (j == 0) { A(i, j) = std::sin(PI<T> * (T(i)/(A.numRows()-1))); } else if (j == A.numCols() - 1) { A(i, j) = std::sin(PI<T> * (T(i)/(A.numRows()-1))) * E_POWER_MINUS_PI<T>; } else { A(i, j) = 0; } }); DeviceGeMatrix<T> devA(A.numRows(), A.numCols(), Order::RowMajor); copy(A, devA); dim3 block(N, N); jacobi<<<1, block>>>(devA.view(), 520); copy(devA, A); auto pixbuf = create_pixbuf(A, [](T val) -> HSVColor<T> { return HSVColor<T>((1-val) * 240, 1, 1); }, 8); gdk_pixbuf_save(pixbuf, "jacobi.jpg", "jpeg", nullptr, "quality", "100", nullptr); }
Makefile
CudaSources := $(wildcard *.cu) CudaObjects := $(patsubst %.cu,%.o,$(CudaSources)) Targets := $(patsubst %.cu,%,$(CudaSources)) STD := -std=c++14 CPPFLAGS := $(STD) -I/home/numerik/pub/pp/ss19/lib NVCCFLAGS := NVCC := nvcc CXX := $(NVCC) CC := $(NVCC) CXXFLAGS := $(shell pkg-config --cflags gdk-pixbuf-2.0 | sed 's/-pthread //') LDLIBS := $(shell pkg-config --libs gdk-pixbuf-2.0 | sed 's/-pthread //; s/-Wl/-Xlinker /g') .PHONY: all all: $(Targets) $(CudaObjects): %.o: %.cu $(NVCC) -c $(CPPFLAGS) $(CXXFLAGS) $(NVCCFLAGS) $< $(Targets): %: %.o $(NVCC) -w -o $@ $(STD) $(NVCCFLAGS) $< $(LDLIBS) GCCSources := $(patsubst %,-x c++ %,$(CudaSources)) .PHONY: depend depend: gcc-makedepend $(CPPFLAGS) -D__CUDACC__ $(GCCSources) .PHONY: clean clean: rm -f $(Targets) $(CudaObjects) # DO NOT DELETE jacobi1-colmaj.o: jacobi1-colmaj.cu \ /home/numerik/pub/pp/ss19/lib/hpc/aux/hsvcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/rgbcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/hostdev.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/check.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/copy.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/cache-line.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/storage/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/tools/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/properties.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/for-all-indices.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/matrix2pixbuf.hpp jacobi0.o: jacobi0.cu /home/numerik/pub/pp/ss19/lib/hpc/aux/hsvcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/rgbcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/hostdev.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/check.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/copy.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/cache-line.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/storage/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/tools/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/properties.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/for-all-indices.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/matrix2pixbuf.hpp jacobi1-rowmaj.o: jacobi1-rowmaj.cu \ /home/numerik/pub/pp/ss19/lib/hpc/aux/hsvcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/rgbcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/hostdev.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/check.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/copy.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/cache-line.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/storage/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/tools/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/properties.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/for-all-indices.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/matrix2pixbuf.hpp jacobi3.o: jacobi3.cu /home/numerik/pub/pp/ss19/lib/hpc/aux/hsvcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/rgbcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/hostdev.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/check.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/copy.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/cache-line.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/storage/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/tools/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/properties.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/matrix2pixbuf.hpp jacobi2.o: jacobi2.cu /home/numerik/pub/pp/ss19/lib/hpc/aux/hsvcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/rgbcolor.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/hostdev.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/check.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/copy.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/aux/cache-line.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/traits.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/views.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/storage/mixin.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/tools/buffer.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/gematrix.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/densevector.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/cuda/properties.hpp \ /home/numerik/pub/pp/ss19/lib/hpc/matvec/matrix2pixbuf.hpp