#include #include #include #include #include #include #include void initMatrix(size_t m, size_t n, double *A, ptrdiff_t incRow, ptrdiff_t incCol) { for (size_t i=0; i fabs(x[iMax*incX])) { iMax = i; } } return iMax; } void dcopy(size_t n, const double *x, ptrdiff_t incX, double *y, ptrdiff_t incY) { for (size_t i=0; ij) ? LU[i*incRowLU+j*incColLU] : (i==j) ? 1 : 0; } } for (j=0; j0; --i) { if (i-1 != P[(i-1)*incP]) { dswap(n, &LU[(i-1)*incRowLU], incColLU, &LU[P[(i-1)*incP]*incRowLU], incColLU); } } for (j=0; j(Y) ? (X) : (Y)) double dgenrm1(size_t m, size_t n, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { double result = 0; for (size_t j=0; jresult) { result = sum; } } return result; } double err_dgemm(size_t m, size_t n, size_t k, double alpha, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA, const double *B, ptrdiff_t incRowB, ptrdiff_t incColB, double beta, const double *C0, ptrdiff_t incRowC0, ptrdiff_t incColC0, double *C, ptrdiff_t incRowC, ptrdiff_t incColC) { double normA = dgenrm1(m, k, A, incRowA, incColA); double normB = dgenrm1(k, n, B, incRowB, incColB); double normC = dgenrm1(m, n, C, incRowC0, incColC0); double normD; size_t mn = (m>n) ? m : n; size_t mnk = (mn>k) ? mn : k; normA = MAX(normA, fabs(alpha)*normA); normC = MAX(normC, fabs(beta)*normC); dgeaxpy(m, n, -1.0, C0, incRowC0, incColC0, C, incRowC, incColC); normD = dgenrm1(m, n, C, incRowC, incColC); return normD/(mnk*normA*normB*normC); } //------------------------------------------------------------------------------ #ifndef MIN_N #define MIN_N 100 #endif #ifndef MAX_N #define MAX_N 2000 #endif #ifndef INC_N #define INC_N 100 #endif #ifndef MIN_M #define MIN_M 100 #endif #ifndef MAX_M #define MAX_M 2000 #endif #ifndef INC_M #define INC_M 100 #endif #ifndef MIN_K #define MIN_K 100 #endif #ifndef MAX_K #define MAX_K 2000 #endif #ifndef INC_K #define INC_K 100 #endif #ifndef ALPHA #define ALPHA 1 #endif #ifndef BETA #define BETA 1 #endif #ifndef ROWMAJOR_A #define ROWMAJOR_A 0 #endif #ifndef ROWMAJOR_B #define ROWMAJOR_B 0 #endif #ifndef ROWMAJOR_C #define ROWMAJOR_C 0 #endif double A_[MAX_M*MAX_K]; double B_[MAX_K*MAX_N]; double C_[MAX_M*MAX_N]; double C0[MAX_M*MAX_N]; // reference solution double C1[MAX_M*MAX_N]; // tested solution int main() { randGeMatrix(MAX_M, MAX_K, A_, 1, MAX_M); randGeMatrix(MAX_K, MAX_N, B_, 1, MAX_K); randGeMatrix(MAX_N, MAX_M, C_, 1, MAX_M); printf("#%9s %9s %9s", "m", "n", "k"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf(" %12s %12s %17s", "t", "MFLOPS", "Residual Error"); printf("\n"); for (size_t m=MIN_M, n=MIN_N, k=MIN_K; n<=MAX_N && m<=MAX_M && k<=MAX_K; m+=INC_M, n+=INC_N, k+=INC_K) { double t, dt, err; size_t runs = 1; double ops = 2.0*m/1000*n/1000*k; ptrdiff_t incRowA = (ROWMAJOR_A==1) ? k : 1; ptrdiff_t incColA = (ROWMAJOR_A==1) ? 1 : m; ptrdiff_t incRowB = (ROWMAJOR_B==1) ? n : 1; ptrdiff_t incColB = (ROWMAJOR_B==1) ? 1 : k; ptrdiff_t incRowC = (ROWMAJOR_C==1) ? n : 1; ptrdiff_t incColC = (ROWMAJOR_C==1) ? 1 : m; printf(" %9zu %9zu %9td", m, n, k); // compute reference solution dgecopy(m, n, C_, 1, MAX_M, C0, incRowC, incColC); dgemm_ref(m, n, k, ALPHA, A_, incRowA, incRowA, B_, incRowB, incColB, BETA, C0, incRowC, incColC); // benchmark dgemm_jil t = 0; runs = 0; do { dgecopy(m, n, C_, 1, MAX_M, C1, incRowC, incColC); dt = walltime(); dgemm_jil(m, n, k, ALPHA, A_, incRowA, incRowA, B_, incRowB, incColB, BETA, C1, incRowC, incColC); dt = walltime() - dt; t += dt; ++runs; } while (t<0.3); t /= runs; err = err_dgemm(m, n, k, ALPHA, A_, incRowA, incColA, B_, incRowB, incColB, BETA, C0, incRowC, incColC, C1, incRowC, incColC); printf(" %12.2e %12.2lf %12.2e %4s", t, ops/t, err, (err