#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#ifndef DGEMM_MC
#define DGEMM_MC 4
#endif
#ifndef DGEMM_KC
#define DGEMM_KC 5
#endif
#ifndef DGEMM_MR
#define DGEMM_MR 2
#endif
#if (DGEMM_MC % DGEMM_MR != 0)
#error "DGEMM_MC must be a multiple of DEGMM_MR."
#endif
void
dpack_A(size_t mc, size_t kc,
const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
double *A_)
{
size_t mb = mc / DGEMM_MR;
for (size_t ib=0; ib<mb; ++ib) {
for (size_t j=0; j<kc; ++j) {
for (size_t i=0; i<DGEMM_MR; ++i) {
A_[ib*DGEMM_MR*kc + j*DGEMM_MR + i]
= A[(ib*DGEMM_MR+i)*incRowA + j*incColA];
}
}
}
if (mb*DGEMM_MR<mc) {
size_t mr = mc % DGEMM_MR;
for (size_t j=0; j<kc; ++j) {
for (size_t i=0; i<mr; ++i) {
A_[mb*DGEMM_MR*kc + j*DGEMM_MR + i]
= A[(mb*DGEMM_MR+i)*incRowA + j*incColA];
}
for (size_t i=mr; i<DGEMM_MR; ++i) {
A_[mb*DGEMM_MR*kc + j*DGEMM_MR + i] = 0;
}
}
}
}
void
initDGeMatrix(size_t m, size_t n,
double *A,
ptrdiff_t incRowA, ptrdiff_t incColA)
{
for (size_t i=0; i<m; ++i) {
for (size_t j=0; j<n; ++j) {
A[i*incRowA + j*incColA] = i*n + j + 1;
}
}
}
void
printDGeMatrix(size_t m, size_t n,
const double *A,
ptrdiff_t incRowA, ptrdiff_t incColA)
{
for (size_t i=0; i<m; ++i) {
for (size_t j=0; j<n; ++j) {
printf("%9.2lf ", A[i*incRowA + j*incColA]);
}
printf("\n");
}
printf("\n");
}
void
test_pack_A(size_t m, size_t k,
double *A,
ptrdiff_t incRowA, ptrdiff_t incColA)
{
double *A_ = malloc(DGEMM_MC*DGEMM_KC*sizeof(double));
if (! A_) {
abort();
}
size_t mb = (m+DGEMM_MC-1) / DGEMM_MC;
size_t kb = (k+DGEMM_KC-1) / DGEMM_KC;
size_t mc_ = m % DGEMM_MC;
size_t kc_ = k % DGEMM_KC;
printf("DGEMM_MC = %d\n", DGEMM_MC);
printf("DGEMM_KC = %d\n", DGEMM_KC);
printf("DGEMM_MR = %d\n", DGEMM_MR);
for (size_t ib=0; ib<mb; ++ib) {
size_t mc = ib<mb-1 || mc_==0 ? DGEMM_MC
: mc_;
for (size_t lb=0; lb<kb; ++lb) {
size_t kc = lb<kb-1 || kc_==0 ? DGEMM_KC
: kc_;
printf("packing %zu x %zu block from A(%zu, %zu)\n",
mc, kc, ib*DGEMM_MC, lb*DGEMM_KC);
dpack_A(mc, kc,
&A[ib*DGEMM_MC*incRowA + lb*DGEMM_KC*incColA],
incRowA, incColA,
A_);
printf("Panels in buffer A_:\n");
printDGeMatrix(DGEMM_MR, DGEMM_MC*DGEMM_KC/DGEMM_MR,
A_, 1, DGEMM_MR);
printf("A_ as array:\n");
printDGeMatrix(1, DGEMM_MC*DGEMM_KC, A_, 0, 1);
}
}
free(A_);
}
#ifndef COLMAJOR
#define COLMAJOR 1
#endif
int
main()
{
size_t m = 7;
size_t k = 8;
ptrdiff_t incRowA = COLMAJOR ? 1 : k;
ptrdiff_t incColA = COLMAJOR ? m : 1;
double *A = malloc(m*k*sizeof(double));
if (!A) {
abort();
}
initDGeMatrix(m, k, A, incRowA, incColA);
printf("m = %zu\n", m);
printf("k = %zu\n", k);
printf("A =\n");
printDGeMatrix(m, k, A, incRowA, incColA);
test_pack_A(m, k, A, incRowA, incColA);
free(A);
}