#include <stddef.h> #include <stdio.h> #include <stdlib.h> void initGeMatrix(size_t m, size_t n, double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { A[i*incRowA+j*incColA] = i*n + j + 1; } } } void printGeMatrix(size_t m, size_t n, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA) { for (size_t i=0; i<m; ++i) { for (size_t j=0; j<n; ++j) { printf("%10.4lf ", A[i*incRowA+j*incColA]); } printf("\n"); } printf("\n"); } #ifndef DGEMM_MC #define DGEMM_MC 8 #endif #ifndef DGEMM_KC #define DGEMM_KC 9 #endif #ifndef DGEMM_MR #define DGEMM_MR 4 #endif void dgepack_A(size_t m, size_t k, const double *A, ptrdiff_t incRowA, ptrdiff_t incColA, double *p) { size_t mb = (m+DGEMM_MR-1)/DGEMM_MR; for (size_t l=0; l<k; ++l) { for (size_t i1=0; i1<mb; ++i1) { for (size_t i0=0; i0<DGEMM_MR; ++i0) { size_t i = i1*DGEMM_MR + i0; size_t nu = i1*DGEMM_MR*k + l*DGEMM_MR + i0; p[nu] = (i<m) ? A[i*incRowA + l*incColA] : 0; } } } } int main() { size_t m = 6; size_t k = 7; double *A = malloc(m*k*sizeof(*A)); ptrdiff_t incRowA = 1; ptrdiff_t incColA = m; double *p = malloc(DGEMM_MC*DGEMM_KC*sizeof(*p)); initGeMatrix(m, k, A, incRowA, incColA); printGeMatrix(m, k, A, incRowA, incColA); dgepack_A(m, k, A, incRowA, incColA, p); printGeMatrix(1, DGEMM_MC*DGEMM_KC, p, 1, 1); free(p); free(A); } |