#include #include #include #define MC 384 #define KC 384 #define NC 4096 #define MR 8 #define NR 4 // // Local buffers for storing panels from A, B and C // static double _A[MC*KC] __attribute__ ((aligned (32))); static double _B[KC*NC] __attribute__ ((aligned (32))); static double _C[MR*NR] __attribute__ ((aligned (32))); // // Packing complete panels from A (i.e. without padding) // static void pack_MRxk(int k, const double *A, int incRowA, int incColA, double *buffer) { int i, j; for (j=0; j0) { for (j=0; j0) { for (i=0; i