#include <stdio.h>
#include <math.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>
void
initGeMatrix(int m, int n, double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
A[i*incRowA+j*incColA] = i*n + j + 1;
}
}
}
void
printGeMatrix(int m, int n, const double *A, int incRowA, int incColA)
{
int i, j;
for (i=0; i<m; ++i) {
for (j=0; j<n; ++j) {
printf("%10.4lf ", A[i*incRowA+j*incColA]);
}
printf("\n");
}
printf("\n\n");
}
#ifndef DGEMM_MC
#define DGEMM_MC 8
#endif
#ifndef DGEMM_NC
#define DGEMM_NC 9
#endif
#ifndef DGEMM_KC
#define DGEMM_KC 10
#endif
#ifndef DGEMM_MR
#define DGEMM_MR 4
#endif
#ifndef DGEMM_NR
#define DGEMM_NR 3
#endif
void
dpack_A(int m, int k, const double *A, int incRowA, int incColA, double *p)
{
int i, i0, j, l, nu;
int mp = (m+DGEMM_MR-1) / DGEMM_MR;
for (j=0; j<k; ++j) {
for (l=0; l<mp; ++l) {
for (i0=0; i0<DGEMM_MR; ++i0) {
i = l*DGEMM_MR + i0;
nu = l*DGEMM_MR*k + j*DGEMM_MR + i0;
p[nu] = (i<m) ? A[i*incRowA+j*incColA]
: 0;
}
}
}
}
void
dpack_B(int k, int n, const double *B, int incRowB, int incColB, double *p)
{
int i, j, j0, l, nu;
int np = (n+DGEMM_NR-1) / DGEMM_NR;
for (l=0; l<np; ++l) {
for (j0=0; j0<DGEMM_NR; ++j0) {
j = l*DGEMM_NR + j0;
for (i=0; i<k; ++i) {
nu = l*DGEMM_NR*k + i*DGEMM_NR + j0;
p[nu] = (j<n) ? B[i*incRowB+j*incColB]
: 0;
}
}
}
}
#ifndef DIM_M
#define DIM_M 10
#endif
#ifndef DIM_K
#define DIM_K 12
#endif
#ifndef DIM_N
#define DIM_N 10
#endif
#ifndef ROWMAJOR
#define INCROW_A 1
#define INCCOL_A DIM_M
#else
#define INCROW_A DIM_K
#define INCCOL_A 1
#endif
#ifndef ROWMAJOR
#define INCROW_B 1
#define INCCOL_B DIM_K
#else
#define INCROW_B DIM_N
#define INCCOL_B 1
#endif
#define MIN(X,Y) (X) < (Y) ? (X) : (Y)
double B[DIM_K*DIM_N];
double B_buffer[DGEMM_KC*DGEMM_NC];
int
main()
{
initGeMatrix(DIM_K, DIM_N, B, INCROW_B, INCCOL_B);
printf("B=\n");
printGeMatrix(DIM_K, DIM_N, B, INCROW_B, INCCOL_B);
printf("Nach packen von Block links oben\n");
dpack_B(MIN(DIM_K, DGEMM_KC), MIN(DIM_N, DGEMM_NC),
B, INCROW_B, INCCOL_B, B_buffer);
printf("B_buffer=\n");
printGeMatrix(1, DGEMM_KC*DGEMM_NC, B_buffer, 0, 1);
printGeMatrix(DGEMM_KC*DGEMM_NC/DGEMM_NR, DGEMM_NR, B_buffer, DGEMM_NR, 1);
printf("Nach packen von Block rechts oben\n");
dpack_B(MIN(DIM_K, DGEMM_KC), MIN(DIM_N-DGEMM_NC, DGEMM_NC),
&B[DGEMM_NC*INCCOL_B], INCROW_B, INCCOL_B, B_buffer);
printf("B_buffer=\n");
printGeMatrix(1, DGEMM_KC*DGEMM_NC, B_buffer, 0, 1);
printGeMatrix(DGEMM_KC*DGEMM_NC/DGEMM_NR, DGEMM_NR, B_buffer, DGEMM_NR, 1);
printf("Nach packen von Block links unten\n");
dpack_B(MIN(DIM_K-DGEMM_KC, DGEMM_KC), MIN(DIM_N, DGEMM_NC),
&B[DGEMM_KC*INCROW_B],
INCROW_B, INCCOL_B, B_buffer);
printf("B_buffer=\n");
printGeMatrix(1, DGEMM_KC*DGEMM_NC, B_buffer, 0, 1);
printGeMatrix(DGEMM_KC*DGEMM_NC/DGEMM_NR, DGEMM_NR, B_buffer, DGEMM_NR, 1);
printf("Nach packen von Block rechts unten\n");
dpack_B(MIN(DIM_K-DGEMM_KC, DGEMM_KC), MIN(DIM_N-DGEMM_NC, DGEMM_NC),
&B[DGEMM_KC*INCROW_B+DGEMM_NC*INCCOL_B],
INCROW_B, INCCOL_B, B_buffer);
printf("B_buffer=\n");
printGeMatrix(1, DGEMM_KC*DGEMM_NC, B_buffer, 0, 1);
printGeMatrix(DGEMM_KC*DGEMM_NC/DGEMM_NR, DGEMM_NR, B_buffer, DGEMM_NR, 1);
return 0;
}