#ifndef HPC_CUDA_GEMATRIX_MAPPED_H
#define HPC_CUDA_GEMATRIX_MAPPED_H 1
/* vim: set sw=4: */
#include <cassert>
#include <cstdlib>
#include <hpc/cuda/check.h>
#include <hpc/cuda/densevector.h>
#include <hpc/cuda/gematrix.h>
#include <hpc/matvec/gematrix.h>
namespace hpc { namespace cuda {
template <typename T, typename I=std::size_t>
struct MappedGeMatrix
{
typedef T ElementType;
typedef I Index;
typedef GeMatrix<T,Index> NoView;
typedef GeMatrixConstView<T,Index> DeviceConstView;
typedef GeMatrixView<T,Index> DeviceView;
typedef hpc::matvec::GeMatrixConstView<T,Index> HostConstView;
typedef hpc::matvec::GeMatrixView<T,Index> HostView;
typedef DenseVectorConstView<T,Index> DeviceConstVectorView;
typedef DenseVectorView<T,Index> DeviceVectorView;
typedef hpc::matvec::DenseVectorConstView<T,Index> HostConstVectorView;
typedef hpc::matvec::DenseVectorView<T,Index> HostVectorView;
MappedGeMatrix(Index numRows, Index numCols,
StorageOrder order=ColMajor)
: numRows(numRows), numCols(numCols),
incRow(order==ColMajor ? 1: numCols),
incCol(order==RowMajor ? 1: numRows)
{
CHECK_CUDA(cudaHostAlloc, (void**)&host_data,
numRows*numCols * sizeof(T), cudaHostAllocMapped);
CHECK_CUDA(cudaHostGetDevicePointer, &device_data,
host_data, 0);
}
~MappedGeMatrix()
{
CHECK_CUDA(cudaFreeHost, host_data);
}
__device__ __host__
const ElementType &
operator()(Index i, Index j) const
{
assert(i<numRows && j<numCols);
# ifdef __CUDA_ARCH__
return device_data[i*incRow + j*incCol];
# else
return host_data[i*incRow + j*incCol];
# endif
}
__device__ __host__
ElementType &
operator()(Index i, Index j)
{
assert(i<numRows && j<numCols);
# ifdef __CUDA_ARCH__
return device_data[i*incRow + j*incCol];
# else
return host_data[i*incRow + j*incCol];
# endif
}
#ifdef __CUDA_ARCH__
__device__
DeviceConstView
operator()(Index i, Index j, Index m, Index n) const
{
assert(i+m<=numRows);
assert(j+n<=numCols);
return DeviceConstView(m, n, &(operator()(i,j)), incRow, incCol);
}
#endif
#ifndef __CUDA_ARCH__
HostConstView
operator()(Index i, Index j, Index m, Index n) const
{
assert(i+m<=numRows);
assert(j+n<=numCols);
return HostConstView(m, n, &(operator()(i,j)), incRow, incCol);
}
#endif
#ifdef __CUDA_ARCH__
__device__
DeviceView
operator()(Index i, Index j, Index m, Index n)
{
assert(i+m<=numRows);
assert(j+n<=numCols);
return DeviceView(m, n, &(operator()(i,j)), incRow, incCol);
}
#endif
#ifndef __CUDA_ARCH__
HostView
operator()(Index i, Index j, Index m, Index n)
{
assert(i+m<=numRows);
assert(j+n<=numCols);
return HostView(m, n, &(operator()(i,j)), incRow, incCol);
}
#endif
#ifdef __CUDA_ARCH__
__device__
DeviceConstVectorView
row(Index i) const
{
return DeviceConstVectorView(numCols, &(operator()(i,0)), incCol);
}
#endif
#ifndef __CUDA_ARCH__
HostConstVectorView
row(Index i) const
{
return HostConstVectorView(numCols, &(operator()(i,0)), incCol);
}
#endif
#ifdef __CUDA_ARCH__
__device__
DeviceVectorView
row(Index i)
{
return DeviceVectorView(numCols, &(operator()(i,0)), incCol);
}
#endif
#ifndef __CUDA_ARCH__
HostVectorView
row(Index i)
{
return HostVectorView(numCols, &(operator()(i,0)), incCol);
}
#endif
#ifdef __CUDA_ARCH__
__device__
DeviceConstVectorView
col(Index j) const
{
return DeviceConstVectorView(numRows, &(operator()(0,j)), incRow);
}
#endif
#ifndef __CUDA_ARCH__
HostConstVectorView
col(Index j) const
{
return HostConstVectorView(numRows, &(operator()(0,j)), incRow);
}
#endif
#ifdef __CUDA_ARCH__
__device__
DeviceVectorView
col(Index j)
{
return DeviceVectorView(numRows, &(operator()(0,j)), incRow);
}
#endif
#ifndef __CUDA_ARCH__
HostVectorView
col(Index j)
{
return HostVectorView(numRows, &(operator()(0,j)), incRow);
}
#endif
const Index numRows, numCols, incRow, incCol;
ElementType* host_data;
ElementType* device_data;
};
} } // namespace cuda, hpc
#endif // HPC_CUDA_GEMATRIX_MAPPED_H