#include <cassert>
#include <chrono>
#include <cmath>
#include <limits>
#include <random>
#include <type_traits>
#include <boost/timer.hpp>
#include <boost/numeric/ublas/matrix.hpp>
#include <boost/numeric/ublas/io.hpp>
#include <boost/numeric/ublas/lu.hpp>
#include <boost/numeric/ublas/operation.hpp>
#include "gemm.hpp"
#include "lu.hpp"
template <typename T>
struct WallTime
{
void
tic()
{
t0 = std::chrono::high_resolution_clock::now();
}
T
toc()
{
using namespace std::chrono;
elapsed = high_resolution_clock::now() - t0;
return duration<T,seconds::period>(elapsed).count();
}
std::chrono::high_resolution_clock::time_point t0;
std::chrono::high_resolution_clock::duration elapsed;
};
//
// For checking I don't care for efficiency of elegance ...
//
template <typename MatrixA1, typename MatrixA2, typename MatrixP>
double
checkLU(const MatrixA1 &A1, MatrixA2 &A2, const MatrixP &P)
{
namespace ublas = boost::numeric::ublas;
typedef typename MatrixA1::size_type size_type;
typedef typename MatrixA1::value_type value_type;
size_type m = A1.size1();
size_type n = A1.size2();
ublas::matrix<value_type> L(m,m);
// Copy L form A2 and set A2 to U
for (size_type j=0; j<m; ++j) {
for (size_type i=0; i<m; ++i) {
if (i==j) {
L(i,i) = 1;
} else if (i>j) {
L(i,j) = A2(i,j);
A2(i,j) = 0;
} else if (j>i) {
L(i,j) = 0;
}
}
}
ublas::matrix<value_type> LU(m,n);
ublas::axpy_prod(L, A2, LU, true);
// LU <- inv(P)*L*U
foo::swap_rows(P, LU, true);
double diff = 0;
for (size_type j=0; j<n; ++j) {
for (size_type i=0; i<m; ++i) {
diff += std::abs(A1(i,j)-LU(i,j));
}
}
return diff;
}
// fill rectangular matrix with random values
template <typename MATRIX>
void
fill(MATRIX &A)
{
typedef typename MATRIX::size_type size_type;
typedef typename MATRIX::value_type T;
std::random_device random;
std::default_random_engine mt(random());
std::uniform_real_distribution<T> uniform(-100,100);
for (size_type i=0; i<A.size1(); ++i) {
for (size_type j=0; j<A.size2(); ++j) {
A(i,j) = uniform(mt);
}
}
}
#ifndef M_MAX
#define M_MAX 4000
#endif
#ifndef N_MAX
#define N_MAX 4000
#endif
int
main()
{
namespace ublas = boost::numeric::ublas;
const std::size_t m_min = 50;
const std::size_t n_min = 50;
const std::size_t m_max = M_MAX;
const std::size_t n_max = N_MAX;
const std::size_t m_inc = 50;
const std::size_t n_inc = 50;
typedef double T;
typedef ublas::row_major SO;
WallTime<double> walltime;
for (std::size_t m=m_min, n=n_min;
m<=m_max && n<=n_max;
m+=m_inc, n+=n_inc)
{
ublas::matrix<T,SO> A_org(m, n);
ublas::matrix<T,SO> A1(m, n);
ublas::matrix<T,SO> A2(m, n);
ublas::matrix<T,SO> A3(m, n);
ublas::permutation_matrix<T> P1(m), P2(m), P3(m);
fill(A_org);
A1 = A_org;
A2 = A_org;
A3 = A_org;
double diff, t, mflops;
std::size_t info;
std::size_t minMN = std::min(m,n);
std::size_t maxMN = std::max(m,n);
mflops = minMN*minMN*maxMN -(minMN*minMN*minMN/3.) -(minMN*minMN/2.);
mflops /= 1000000;
walltime.tic();
info = ublas::lu_factorize(A1, P1);
t = walltime.toc();
diff = checkLU(A_org, A1, P1);
std::printf("%4ld %4ld %4ld %16.3e %16.5lf %16.5lf",
m, n, info, diff, t, mflops/t);
walltime.tic();
info = foo::lu_blocked(A2, P2);
t = walltime.toc();
diff = checkLU(A_org, A2, P2);
std::printf("%4ld %16.3e %16.5lf %16.5lf",
info, diff, t, mflops/t);
walltime.tic();
info = foo::lu_blocked_recursive(A3, P3);
t = walltime.toc();
diff = checkLU(A_org, A3, P3);
std::printf("%4ld %16.3e %16.5lf %16.5lf\n",
info, diff, t, mflops/t);
}
}