#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = i*n + j + 1;
}
}
}
void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
printf("%10.4lf ", A[i*incRowA+j*incColA]);
}
printf("\n");
}
printf("\n\n");
}
void
dgecopy(int m, int n,
const double *X, int incRowX, int incColX,
double *Y, int incRowY, int incColY)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
Y[i*incRowY+j*incColY] = X[i*incRowX+j*incColX];
}
}
}
void
dgescal(int m, int n, double alpha,
double *X, int incRowX, int incColX)
{
int i, j;
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
X[i*incRowX+j*incColX] *= alpha;
}
}
}
void
dgeaxpy(int m, int n, double alpha,
const double *X, int incRowX, int incColX,
double *Y, int incRowY, int incColY)
{
int i, j;
if (alpha==0 || m==0 || n==0) {
return;
}
for (j=0; j<n; ++j) {
for (i=0; i<m; ++i) {
Y[i*incRowY+j*incColY] += alpha*X[i*incRowX+j*incColX];
}
}
}
void
dgemm_ilj(int m, int n, int k,
double alpha,
const double *A, int incRowA, int incColA,
const double *B, int incRowB, int incColB,
double beta,
double *C, int incRowC, int incColC)
{
int i, j, l;
for (i=0; i<m; ++i) {
for (l=0; l<k; ++l) {
for (j=0; j<n; ++j) {
if (l==0) {
C[i*incRowC+j*incColC] *= beta;
}
C[i*incRowC+j*incColC] += alpha*A[i*incRowA+l*incColA]
*B[l*incRowB+j*incColB];
}
}
}
}
#ifndef DGEMM_MC
#define DGEMM_MC 8
#endif
#ifndef DGEMM_NC
#define DGEMM_NC 9
#endif
#ifndef DGEMM_KC
#define DGEMM_KC 10
#endif
#ifndef DGEMM_MR
#define DGEMM_MR 4
#endif
#ifndef DGEMM_NR
#define DGEMM_NR 3
#endif
void
dpack_A(int m, int k, const double *A, int incRowA, int incColA, double *p)
{
int i, i0, j, l, nu;
int mp = (m+DGEMM_MR-1) / DGEMM_MR;
for (j=0; j<k; ++j) {
for (l=0; l<mp; ++l) {
for (i0=0; i0<DGEMM_MR; ++i0) {
i = l*DGEMM_MR + i0;
nu = l*DGEMM_MR*k + j*DGEMM_MR + i0;
p[nu] = (i<m) ? A[i*incRowA+j*incColA]
: 0;
}
}
}
}
void
dpack_B(int k, int n, const double *B, int incRowB, int incColB, double *p)
{
int i, j, j0, l, nu;
int np = (n+DGEMM_NR-1) / DGEMM_NR;
for (l=0; l<np; ++l) {
for (j0=0; j0<DGEMM_NR; ++j0) {
j = l*DGEMM_NR + j0;
for (i=0; i<k; ++i) {
nu = l*DGEMM_NR*k + i*DGEMM_NR + j0;
p[nu] = (j<n) ? B[i*incRowB+j*incColB]
: 0;
}
}
}
}
void
dgemm_micro(int k,
double alpha,
const double *A,
const double *B,
double beta,
double *C, int incRowC, int incColC)
{
int i, j, l;
double AB[DGEMM_MR*DGEMM_NR];
for (l=0; l<DGEMM_MR*DGEMM_NR; ++l) {
AB[l] = 0;
}
for (l=0; l<k; ++l) {
for (j=0; j<DGEMM_NR; ++j) {
for (i=0; i<DGEMM_MR; ++i) {
AB[i+j*DGEMM_MR] += A[i+l*DGEMM_MR]*B[j+l*DGEMM_NR];
}
}
}
if (beta==0) {
for (j=0; j<DGEMM_NR; ++j) {
for (i=0; i<DGEMM_MR; ++i) {
C[i*incRowC+j*incColC] = alpha*AB[i+j*DGEMM_MR];
}
}
} else {
for (j=0; j<DGEMM_NR; ++j) {
for (i=0; i<DGEMM_MR; ++i) {
C[i*incRowC+j*incColC] = beta*C[i*incRowC+j*incColC]
+ alpha*AB[i+j*DGEMM_MR];
}
}
}
}
#ifndef DIM_K
#define DIM_K 12
#endif
#ifndef ALPHA
#define ALPHA 2
#endif
#ifndef BETA
#define BETA 2
#endif
#ifndef ROWMAJOR
#define INCROW_C 1
#define INCCOL_C DGEMM_MR
#else
#define INCROW_C DGEMM_NR
#define INCCOL_C 1
#endif
double A_panel[DIM_K*DGEMM_MR];
double B_panel[DIM_K*DGEMM_NR];
double C[DGEMM_MR*DGEMM_NR];
double C_ref[DGEMM_MR*DGEMM_NR];
int
main()
{
initGeMatrix(DGEMM_MR, DIM_K, A_panel, 1, DGEMM_MR);
initGeMatrix(DIM_K, DGEMM_NR, B_panel, DGEMM_NR, 1);
initGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);
dgecopy(DGEMM_MR, DGEMM_NR,
C, INCROW_C, INCCOL_C,
C_ref, INCROW_C, INCCOL_C);
printf("A_panel =\n");
printGeMatrix(DGEMM_MR, DIM_K, A_panel, 1, DGEMM_MR);
printf("B_panel =\n");
printGeMatrix(DIM_K, DGEMM_NR, B_panel, DGEMM_NR, 1);
printf("C =\n");
printGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);
dgemm_micro(DIM_K,
(double)ALPHA,
A_panel, B_panel,
(double)BETA,
C, INCROW_C, INCCOL_C);
printf("%lf*C + %lf*A*B =\n", (double)ALPHA, (double)BETA);
printGeMatrix(DGEMM_MR, DGEMM_NR, C, INCROW_C, INCCOL_C);
printf("C_ref =\n");
dgemm_ilj(DGEMM_MR, DGEMM_NR, DIM_K,
(double)ALPHA,
A_panel, 1, DGEMM_MR,
B_panel, DGEMM_NR, 1,
(double)BETA,
C_ref, INCROW_C, INCCOL_C);
printGeMatrix(DGEMM_MR, DGEMM_NR, C_ref, INCROW_C, INCCOL_C);
return 0;
}