#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = i*n + j + 1;
}
}
}
void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
printf("%10.4lf ", A[i*incRowA+j*incColA]);
}
printf("\n");
}
printf("\n\n");
}
#ifndef DGEMM_MC
#define DGEMM_MC 8
#endif
#ifndef DGEMM_NC
#define DGEMM_NC 9
#endif
#ifndef DGEMM_KC
#define DGEMM_KC 10
#endif
#ifndef DGEMM_MR
#define DGEMM_MR 4
#endif
#ifndef DGEMM_NR
#define DGEMM_NR 3
#endif
void
dpack_A(int m, int k, const double *A, int incRowA, int incColA, double *p)
{
int i, i0, j, l, nu;
int mp = (m+DGEMM_MR-1) / DGEMM_MR;
printf("m=%d, k=%d, mp=%d\n", m, k, mp);
for (j=0; j<k; ++j) {
for (l=0; l<mp; ++l) {
for (i0=0; i0<DGEMM_MR; ++i0) {
i = l*DGEMM_MR + i0;
nu = l*DGEMM_MR*k + j*DGEMM_MR + i0;
p[nu] = (i<m) ? A[i*incRowA+j*incColA]
: 0;
}
}
}
}
#ifndef DIM_M
#define DIM_M 10
#endif
#ifndef DIM_K
#define DIM_K 12
#endif
#ifndef ROWMAJOR
#define INCROW_A 1
#define INCCOL_A DIM_M
#else
#define INCROW_A DIM_N
#define INCCOL_A 1
#endif
double A[DIM_M*DIM_K];
double A_buffer[DGEMM_MC*DGEMM_KC];
int
main()
{
initGeMatrix(DIM_M, DIM_K, A, INCROW_A, INCCOL_A);
printf("A=\n");
printGeMatrix(DIM_M, DIM_K, A, INCROW_A, INCCOL_A);
printf("Nach packen von Block links oben\n");
dpack_A(DGEMM_MC, DGEMM_KC, A, INCROW_A, INCCOL_A, A_buffer);
printf("A_buffer=\n");
printGeMatrix(1, DGEMM_MC*DGEMM_KC, A_buffer, 0, 1);
printf("Nach packen von Block rechts oben\n");
printf("A_buffer=\n");
printGeMatrix(1, DGEMM_MC*DGEMM_KC, A_buffer, 0, 1);
printf("Nach packen von Block links unten\n");
printf("A_buffer=\n");
printGeMatrix(1, DGEMM_MC*DGEMM_KC, A_buffer, 0, 1);
printf("Nach packen von Block rechts unten\n");
printf("A_buffer=\n");
printGeMatrix(1, DGEMM_MC*DGEMM_KC, A_buffer, 0, 1);
return 0;
}