Possible Solution
Setup a Framework for Testing
-
Define Macros DGEMM_KC and DGEMM_NC for \(K_c\) and \(N_c\).
-
Define a Macro DGEMM_NR for \(N_r\).
-
Allocate a small matrix \(B\).
-
Allocate a buffer \(p\) for \(K_c \cdot N_c\) elements.
-
Initialize matrix \(B\).
-
Print matrix \(B\) and then buffer \(p\).
So again, we postpone the actual implementation of gepack_B!
#include <stddef.h> #include <stdio.h> #include <stdlib.h> void initGeMatrix(size_t m, size_t n, double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { A[i*incRowA+j*incColA] = i*n + j + 1; } } } void printGeMatrix(size_t m, size_t n, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { printf("%10.4lf ", A[i*incRowA+j*incColA]); } printf("\n"); } printf("\n"); } #ifndef DGEMM_KC #define DGEMM_KC 8 #endif #ifndef DGEMM_NC #define DGEMM_NC 10 #endif #ifndef DGEMM_NR #define DGEMM_NR 5 #endif int main() { size_t k = 7; size_t n = 8; double *B = malloc(k*n*sizeof(*B)); ptrdiff_t incRowB = 1; ptrdiff_t incColB = k; double *p = malloc(DGEMM_KC*DGEMM_NC*sizeof(*p)); initGeMatrix(k, n, B, incRowB, incColB); printGeMatrix(k, n, B, incRowB, incColB); printGeMatrix(1, DGEMM_KC*DGEMM_NC, p, 1, 1); free(p); free(B); }
Implement and test dgepack_B
After compiling and running the above code we start to implement dgepack_B:
#include <stddef.h> #include <stdio.h> #include <stdlib.h> void initGeMatrix(size_t m, size_t n, double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { A[i*incRowA+j*incColA] = i*n + j + 1; } } } void printGeMatrix(size_t m, size_t n, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { printf("%10.4lf ", A[i*incRowA+j*incColA]); } printf("\n"); } printf("\n"); } #ifndef DGEMM_KC #define DGEMM_KC 8 #endif #ifndef DGEMM_NC #define DGEMM_NC 10 #endif #ifndef DGEMM_NR #define DGEMM_NR 5 #endif void dgepack_B(size_t k, size_t n, const double *B, ptrdiff_t incRowB, ptrdiff_t incColB, double *p) { size_t nb = (n+DGEMM_NR-1)/DGEMM_NR; for (size_t j1=0; j1<nb; ++j1) { for (size_t j0=0; j0<DGEMM_NR; ++j0) { for (size_t l=0; l<k; ++l) { size_t j = j1*DGEMM_NR + j0; size_t nu = j1*DGEMM_NR*k + l*DGEMM_NR + j0; p[nu] = (j<n) ? B[l*incRowB + j*incColB] : 0; } } } } int main() { size_t k = 7; size_t n = 8; double *B = malloc(k*n*sizeof(*B)); ptrdiff_t incRowB = 1; ptrdiff_t incColB = k; double *p = malloc(DGEMM_KC*DGEMM_NC*sizeof(*p)); initGeMatrix(k, n, B, incRowB, incColB); printGeMatrix(k, n, B, incRowB, incColB); dgepack_B(k, n, B, incRowB, incColB, p); printGeMatrix(1, DGEMM_KC*DGEMM_NC, p, 1, 1); free(p); free(B); }