1
      2
      3
      4
      5
      6
      7
      8
      9
     10
     11
     12
     13
     14
     15
     16
     17
     18
     19
     20
     21
     22
     23
     24
     25
     26
     27
     28
     29
     30
     31
     32
     33
     34
     35
     36
     37
     38
     39
     40
     41
     42
     43
     44
     45
     46
     47
     48
     49
     50
     51
     52
     53
     54
     55
     56
     57
     58
     59
     60
     61
     62
     63
     64
     65
     66
     67
     68
     69
     70
     71
     72
     73
     74
     75
     76
     77
     78
     79
     80
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>

void
initGeMatrix(size_t m, size_t n,
             double *A,
             ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            A[i*incRowA+j*incColA] = i*n + j + 1;
        }
    }
}

void
printGeMatrix(size_t m, size_t n,
              const double *A,
              ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%10.4lf ", A[i*incRowA+j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

#ifndef DGEMM_KC
#define DGEMM_KC    8
#endif

#ifndef DGEMM_NC
#define DGEMM_NC    10
#endif

#ifndef DGEMM_NR
#define DGEMM_NR    5
#endif

void
dgepack_B(size_t k, size_t n,
          const double *B, ptrdiff_t incRowB, ptrdiff_t incColB,
          double *p)
{
    size_t nb = (n+DGEMM_NR-1)/DGEMM_NR;

    for (size_t j1=0; j1<nb; ++j1) {
        for (size_t j0=0; j0<DGEMM_NR; ++j0) {
            for (size_t l=0; l<k; ++l) {
                size_t j  = j1*DGEMM_NR + j0;
                size_t nu = j1*DGEMM_NR*k + l*DGEMM_NR + j0;
                p[nu] = (j<n) ? B[l*incRowB + j*incColB]
                              : 0;
            }
        }
    }
}

int
main()
{
    size_t      k       = 7;
    size_t      n       = 8;
    double      *B      = malloc(k*n*sizeof(*B));
    ptrdiff_t   incRowB = 1;
    ptrdiff_t   incColB = k;

    double      *p      = malloc(DGEMM_KC*DGEMM_NC*sizeof(*p));

    initGeMatrix(k, n, B, incRowB, incColB);
    printGeMatrix(k, n, B, incRowB, incColB);
    dgepack_B(k, n, B, incRowB, incColB, p);
    printGeMatrix(1, DGEMM_KC*DGEMM_NC, p, 1, 1);

    free(p);
    free(B);
}