Possible Solution and another Exercise

Content

Source Code

#include <stdlib.h>         // for malloc(), free(), rand(), srand(), abort()
#include <stdio.h>          // for printf()
#include <stddef.h>         // for size_t, ptrdiff_t
#include <math.h>           // for nan(), fabs()
#include <float.h>          // for DBL_EPSILON
#include <stdbool.h>        // for typedef bool
#include <sys/times.h>      // needed for walltime()
#include <unistd.h>         // needed for walltime()

//-- Function for benchmarking and testing -------------------------------------

double
walltime()
{
   struct tms    ts;
   static double ClockTick=0.0;

   if (ClockTick==0.0) {
        ClockTick = 1.0 / ((double) sysconf(_SC_CLK_TCK));
   }
   return ((double) times(&ts)) * ClockTick;
}


void
initGeMatrix(size_t m, size_t n,
             double *A,
             ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            A[i*incRowA + j*incColA] = i*n + j +1;
        }
    }
}

void
randGeMatrix(size_t m, size_t n, bool withNan,
             double *A,
             ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            A[i*incRowA + j*incColA] = withNan
                                     ? nan("")
                                     : 2.*(rand()-RAND_MAX/2)/RAND_MAX;
        }
    }
}

void
printGeMatrix(size_t m, size_t n,
              const double *A,
              ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%9.2lf ", A[i*incRowA + j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

double
dgenrm_inf(size_t m, size_t n,
           const double *A,
           ptrdiff_t incRowA, ptrdiff_t incColA)
{
    double result = 0;

    for (size_t i=0; i<m; ++i) {
        double sum = 0;

        for (size_t j=0; j<n; ++j) {
            sum += fabs(A[i*incRowA+j*incColA]);
        }

        if (sum>result) {
            result = sum;
        }
    }

    return result;
}

//-- BLAS Level 1 functions ----------------------------------------------------

void
dcopy(size_t n,
      const double *x, ptrdiff_t incX,
      double *y, ptrdiff_t incY)
{
    for (size_t i=0; i<n; ++i) {
        y[i*incY] = x[i*incX];
    }
}

void
daxpy(size_t n, double alpha,
      const double *x, ptrdiff_t incX,
      double *y, ptrdiff_t incY)
{
    for (size_t i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

double
ddot(size_t n,
     const double *x, ptrdiff_t incX,
     const double *y, ptrdiff_t incY)
{
    double result = 0;

    for (size_t i=0; i<n; ++i) {
        result += x[i*incX]*y[i*incY];
    }

    return result;
}

void
dscal(size_t n,
      double alpha,
      double *x, ptrdiff_t incX)
{
    if (alpha==1) {
        return;
    }
    if (alpha!=0) {
        for (size_t i=0; i<n; ++i) {
            x[i*incX] *= alpha;
        }
    } else {
        for (size_t i=0; i<n; ++i) {
            x[i*incX] = 0;
        }
    }
}

//-- BLAS Level 2 functions ----------------------------------------------------

void
dgemv_dot(size_t m, size_t n,
          double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *x, ptrdiff_t incX,
          double beta,
          double *y, ptrdiff_t incY)
{
    dscal(m, beta, y, incY);

    if (alpha==0) {
        return;
    }

    for (size_t i=0; i<m; ++i) {
        y[i*incY] += alpha*ddot(n, &A[i*incRowA], incColA, x, incX);
    }
}

#ifndef DOTF
#define DOTF 4
#endif

void
dgemv_dotf(size_t m, size_t n,
           double alpha,
           const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
           const double *x, ptrdiff_t incX,
           double beta,
           double *y, ptrdiff_t incY)
{
    dscal(m, beta, y, incY);

    if (alpha==0) {
        return;
    }

    size_t mb = m / DOTF;

    for (size_t i=0; i<mb; ++i) {
        for (size_t j=0; j<n; ++j) {
            for (size_t l=0; l<DOTF; ++l) {
                y[(DOTF*i+l)*incY]
                    += alpha*A[(DOTF*i+l)*incRowA+j*incColA]*x[j*incX];
            }
        }
    }

    for (size_t i=mb*DOTF; i<m; ++i) {
        y[i*incY] += alpha*ddot(n, &A[i*incRowA], incColA, x, incX);
    }
}


void
dgemv_axpy(size_t m, size_t n,
           double alpha,
           const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
           const double *x, ptrdiff_t incX,
           double beta,
           double *y, ptrdiff_t incY)
{
    dscal(m, beta, y, incY);

    if (alpha==0) {
        return;
    }

    for (size_t j=0; j<n; ++j) {
        daxpy(m, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
    }
}

#ifndef AXPYF
#define AXPYF 4
#endif

void
dgemv_axpyf(size_t m, size_t n,
            double alpha,
            const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
            const double *x, ptrdiff_t incX,
            double beta,
            double *y, ptrdiff_t incY)
{
    dscal(m, beta, y, incY);

    if (alpha==0) {
        return;
    }

    size_t nb = n / AXPYF;
    for (size_t j=0; j<nb; ++j) {
        for (size_t i=0; i<m; ++i) {
            for (size_t l=0; l<AXPYF; ++l) {
                y[i*incY] += alpha*A[i*incRowA+(j*AXPYF+l)*incColA]
                                  *x[(j*AXPYF+l)*incX];
            }
        }
    }

    for (size_t j=nb*AXPYF; j<n; ++j) {
        daxpy(n, alpha*x[j*incX], &A[j*incColA], incRowA, y, incY);
    }
}


//-- BLAS Level 2: dgemv reference implementation and error bound --------------

void
dgemv_ref(size_t m, size_t n,
          double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *x, ptrdiff_t incX,
          double beta,
          double *y, ptrdiff_t incY)
{
    if (beta!=1) {
        if (beta!=0) {
            for (size_t i=0; i<m; ++i) {
                y[i*incY] *= beta;
            }
        } else {
            for (size_t i=0; i<m; ++i) {
                y[i*incY] = 0;
            }
        }
    }
    if (alpha!=0) {
        for (size_t j=0; j<n; ++j) {
            for (size_t i=0; i<m; ++i) {
                y[i*incY] += alpha*A[i*incRowA+j*incColA]*x[j*incX];
            }
        }
    }
}

// - Computes error bound for the test result ySol of the gemv operation
//   beta*y0 + alpha*A*x.
// - yRef is trusted result.
// - ySol gets overwritten.
double
dgemv_err(size_t m, size_t n,
          double alpha,
          const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
          const double *x, ptrdiff_t incX,
          const double *y0, ptrdiff_t incY0,
          double beta,
          const double *yRef, ptrdiff_t incYRef,
          double *ySol, ptrdiff_t incYSol)
{
    double nrmY0 = dgenrm_inf(m, 1, y0, incY0, 1);
    double nrmX  = dgenrm_inf(n, 1, x, incX, 1);
    double nrmA  = dgenrm_inf(m, n, A, incRowA, incColA);
    size_t maxMN = m<n ? n : m;

    // nrmDiff = ||y2 - y1||_inf
    daxpy(m, -1, yRef, incYRef, ySol, incYSol);
    double nrmDiff = dgenrm_inf(m, 1, ySol, incYSol, 1);

    return nrmDiff /
        (DBL_EPSILON*(maxMN*fabs(alpha)*nrmA*nrmX + m*fabs(beta)*nrmY0));
}



//------------------------------------------------------------------------------

#ifndef COLMAJOR
#define COLMAJOR 0
#endif

#ifndef SEED_RAND
#define SEED_RAND 0
#endif

#ifndef MAX_M
#define MAX_M 4500
#endif

#ifndef MAX_N
#define MAX_N 4500
#endif

#ifndef ALPHA
#define ALPHA 1
#endif

#ifndef BETA
#define BETA 1
#endif

int
main()
{
    srand(SEED_RAND);
    printf("#COLMAJOR = %d\n", COLMAJOR);
    printf("#ALPHA    = %lf\n", (double)ALPHA);
    printf("#BETA     = %lf\n", (double)BETA);

    double *A    = malloc(MAX_M*MAX_N*sizeof(double));
    double *x    = malloc(MAX_N*sizeof(double));
    double *y0   = malloc(MAX_M*sizeof(double));
    double *yRef = malloc(MAX_M*sizeof(double));
    double *ySol = malloc(MAX_M*sizeof(double));
    if (!A || !x || !y0 || !yRef || !ySol) {
        abort();
    }

    // print header
    printf("#%4s %4s ", "m", "n");
    printf("%10s %10s ", "time ref", "mflops ref");
    printf("%10s %10s %7s ", "time 1", "mflops 1", "err");
    printf("%10s %10s %7s ", "time 2", "mflops 2", "err");
    printf("%10s %10s %7s ", "time 3", "mflops 3", "err");
    printf("%10s %10s %7s ", "time 4", "mflops 4", "err");
    printf("\n");

    for (size_t m=16, n=16; m<=MAX_M && n<=MAX_N; m+=16, n+=16) {
        ptrdiff_t incRowA = COLMAJOR ? 1 : n;
        ptrdiff_t incColA = COLMAJOR ? m : 1;

        ptrdiff_t incX    = 1;
        ptrdiff_t incY0   = 1;
        ptrdiff_t incYRef = 1;
        ptrdiff_t incYSol = 1;

        double    alpha = ALPHA;
        double    beta  = BETA;

        double    mflop = m*(2*n+1)/1000./1000.;

        randGeMatrix(m, n, ALPHA==0, A, incRowA, incColA);
        randGeMatrix(n, 1, ALPHA==0, x, incX, 0);
        randGeMatrix(m, 1, BETA==0, y0, incY0, 0);

        //printf("A =\n");
        //printGeMatrix(m, n, A, incRowA, incColA);

        //printf("x =\n");
        //printGeMatrix(1, n, x, 0, incX);

        //printf("y0 =\n");
        //printGeMatrix(1, m, y0, 0, incY0);

        printf(" %4zu %4zu ", m,  n);

        {
            double t    = 0;
            size_t runs = 0;

            while (t<0.1 || runs<3) {
                dcopy(m, y0, incY0, yRef, incYRef);

                double t0 = walltime();
                dgemv_ref(m, n,
                          alpha,
                          A, incRowA, incColA,
                          x, incX,
                          beta,
                          yRef, incYRef);
                t += walltime() - t0;
                ++runs;
            }
            t /= runs;

            //printf("\nyRef =\n");
            //printGeMatrix(1, m, yRef, 0, incYRef);
            printf("%10.2lf %10.2lf ", t, mflop/t);
        }

        {
            double t    = 0;
            size_t runs = 0;

            while (t<0.1 || runs<3) {
                dcopy(m, y0, incY0, ySol, incYSol);

                double t0 = walltime();
                dgemv_dot(m, n,
                          alpha,
                          A, incRowA, incColA,
                          x, incX,
                          beta,
                          ySol, incYSol);
                t += walltime() - t0;
                ++runs;
            }
            t /= runs;

            //printf("\nySol =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            double err = dgemv_err(m, n, alpha,
                                   A, incRowA, incColA,
                                   x, incX,
                                   y0, incY0,
                                   beta,
                                   yRef, incYRef,
                                   ySol, incYSol);

            //printf("\nyDiff =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            printf("%10.2lf %10.2lf %7.1e ", t, mflop/t, err);
        }

        {
            double t    = 0;
            size_t runs = 0;

            while (t<0.1 || runs<3) {
                dcopy(m, y0, incY0, ySol, incYSol);

                double t0 = walltime();
                dgemv_axpy(m, n,
                           alpha,
                           A, incRowA, incColA,
                           x, incX,
                           beta,
                           ySol, incYSol);
                t += walltime() - t0;
                ++runs;
            }
            t /= runs;

            //printf("\nySol =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            double err = dgemv_err(m, n, alpha,
                                   A, incRowA, incColA,
                                   x, incX,
                                   y0, incY0,
                                   beta,
                                   yRef, incYRef,
                                   ySol, incYSol);

            //printf("\nyDiff =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            printf("%10.2lf %10.2lf %7.1e ", t, mflop/t, err);
        }

        {
            double t    = 0;
            size_t runs = 0;

            while (t<0.1 || runs<3) {
                dcopy(m, y0, incY0, ySol, incYSol);

                double t0 = walltime();
                dgemv_dotf(m, n,
                          alpha,
                          A, incRowA, incColA,
                          x, incX,
                          beta,
                          ySol, incYSol);
                t += walltime() - t0;
                ++runs;
            }
            t /= runs;

            //printf("\nySol =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            double err = dgemv_err(m, n, alpha,
                                   A, incRowA, incColA,
                                   x, incX,
                                   y0, incY0,
                                   beta,
                                   yRef, incYRef,
                                   ySol, incYSol);

            //printf("\nyDiff =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            printf("%10.2lf %10.2lf %7.1e ", t, mflop/t, err);
        }

        {
            double t    = 0;
            size_t runs = 0;

            while (t<0.1 || runs<3) {
                dcopy(m, y0, incY0, ySol, incYSol);

                double t0 = walltime();
                dgemv_axpyf(m, n,
                            alpha,
                            A, incRowA, incColA,
                            x, incX,
                            beta,
                            ySol, incYSol);
                t += walltime() - t0;
                ++runs;
            }
            t /= runs;

            //printf("\nySol =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            double err = dgemv_err(m, n, alpha,
                                   A, incRowA, incColA,
                                   x, incX,
                                   y0, incY0,
                                   beta,
                                   yRef, incYRef,
                                   ySol, incYSol);

            //printf("\nyDiff =\n");
            //printGeMatrix(1, m, ySol, 0, incYSol);

            printf("%10.2lf %10.2lf %7.1e ", t, mflop/t, err);
        }


        printf("\n");
        //break;
    }

    free(A);
    free(x);
    free(y0);
    free(yRef);
    free(ySol);
}

Test Run

heim$ gcc -Wall -std=c11 -O3 -DCOLMAJOR=1 -DMAX_M=3000 -o gemv gemv_sol.c
heim$ ./gemv | tee gemv_colmajor.dat
#COLMAJOR = 1
#ALPHA    = 1.000000
#BETA     = 1.000000
#   m    n   time ref mflops ref     time 1   mflops 1     err     time 2   mflops 2     err     time 3   mflops 3     err     time 4   mflops 4     err 
   16   16       0.00     898.33       0.00     930.24 6.6e-03       0.00    1067.20 0.0e+00       0.00    1154.03 0.0e+00       0.00     953.51 0.0e+00 
   32   32       0.00    2019.74       0.00    1381.72 1.2e-02       0.00    3250.35 0.0e+00       0.00    3200.51 0.0e+00       0.00    3052.42 0.0e+00 
   48   48       0.00    3270.47       0.00    1832.93 8.7e-03       0.00    4053.65 0.0e+00       0.00    4330.73 0.0e+00       0.00    5831.08 0.0e+00 
   64   64       0.00    2900.48       0.00    2268.67 3.3e-03       0.00    3050.59 0.0e+00       0.00    5552.24 0.0e+00       0.00    7321.72 0.0e+00 
   80   80       0.00    3945.66       0.00    2143.75 4.3e-03       0.00    3490.87 0.0e+00       0.00    5478.51 0.0e+00       0.00    6680.08 0.0e+00 
   96   96       0.00    3468.44       0.00    2917.23 2.9e-03       0.00    3766.56 0.0e+00       0.00    4986.07 0.0e+00       0.00    6853.00 0.0e+00 
  112  112       0.00    3740.44       0.00    2243.03 2.3e-03       0.00    3769.92 0.0e+00       0.00    6126.81 0.0e+00       0.00    8266.10 0.0e+00 
  128  128       0.00    3818.90       0.00    2129.69 2.1e-03       0.00    3704.39 0.0e+00       0.00    4519.31 0.0e+00       0.00    8993.11 0.0e+00 
  144  144       0.00    4279.26       0.00    2328.23 3.3e-03       0.00    3911.07 0.0e+00       0.00    5290.53 0.0e+00       0.00    8245.64 0.0e+00 
  160  160       0.00    4063.09       0.00    2072.14 1.9e-03       0.00    4094.33 0.0e+00       0.00    3972.46 0.0e+00       0.00    8805.44 0.0e+00 
  176  176       0.00    4491.29       0.00    2286.31 1.2e-03       0.00    4083.67 0.0e+00       0.00    5790.95 0.0e+00       0.00    8839.12 0.0e+00 
  192  192       0.00    4501.73       0.00    2050.27 2.3e-03       0.00    4002.43 0.0e+00       0.00    4167.74 0.0e+00       0.00    7871.81 0.0e+00 
  208  208       0.00    4211.90       0.00    2135.28 1.3e-03       0.00    4267.41 0.0e+00       0.00    4442.46 0.0e+00       0.00    7923.33 0.0e+00 
  224  224       0.00    4532.32       0.00    1911.95 1.7e-03       0.00    4212.12 0.0e+00       0.00    3628.96 0.0e+00       0.00    7718.75 0.0e+00 
  240  240       0.00    4347.89       0.00    2119.48 1.3e-03       0.00    4148.49 0.0e+00       0.00    4200.97 0.0e+00       0.00    9012.40 0.0e+00 
  256  256       0.00    4250.25       0.00    1504.30 1.3e-03       0.00    4269.47 0.0e+00       0.00    2744.76 0.0e+00       0.00    8203.22 0.0e+00 
  272  272       0.00    4341.95       0.00    1950.84 1.2e-03       0.00    4742.33 0.0e+00       0.00    4026.20 0.0e+00       0.00    7867.50 0.0e+00 
  288  288       0.00    4453.52       0.00    1824.91 1.2e-03       0.00    4378.74 0.0e+00       0.00    3518.40 0.0e+00       0.00    7820.85 0.0e+00 
  304  304       0.00    4387.72       0.00    1883.34 9.4e-04       0.00    4310.30 0.0e+00       0.00    3559.66 0.0e+00       0.00    8073.78 0.0e+00 
  320  320       0.00    4314.98       0.00    1823.70 8.4e-04       0.00    4801.67 0.0e+00       0.00    3322.94 0.0e+00       0.00    8092.92 0.0e+00 
  336  336       0.00    4414.02       0.00    1800.80 7.7e-04       0.00    4473.22 0.0e+00       0.00    3400.14 0.0e+00       0.00    8065.99 0.0e+00 
  352  352       0.00    4509.74       0.00    1818.34 9.9e-04       0.00    4444.55 0.0e+00       0.00    3519.36 0.0e+00       0.00    7965.94 0.0e+00 
  368  368       0.00    4524.38       0.00    1844.27 5.4e-04       0.00    4369.04 0.0e+00       0.00    3476.50 0.0e+00       0.00    8196.15 0.0e+00 
  384  384       0.00    4348.90       0.00    1516.75 6.9e-04       0.00    4359.64 0.0e+00       0.00    2772.83 0.0e+00       0.00    8217.28 0.0e+00 
  400  400       0.00    4363.27       0.00    1842.30 6.5e-04       0.00    4555.51 0.0e+00       0.00    3323.42 0.0e+00       0.00    8362.44 0.0e+00 
  416  416       0.00    4445.95       0.00    1738.94 5.8e-04       0.00    4567.87 0.0e+00       0.00    3274.69 0.0e+00       0.00    8036.30 0.0e+00 
  432  432       0.00    4538.51       0.00    1803.86 4.8e-04       0.00    4499.11 0.0e+00       0.00    3427.66 0.0e+00       0.00    8142.83 0.0e+00 
  448  448       0.00    4428.45       0.00    1724.33 7.3e-04       0.00    4409.46 0.0e+00       0.00    3086.98 0.0e+00       0.00    8161.70 0.0e+00 
  464  464       0.00    4843.50       0.00    1784.57 5.4e-04       0.00    4482.98 0.0e+00       0.00    3232.92 0.0e+00       0.00    8378.16 0.0e+00 
  480  480       0.00    4783.47       0.00    1780.54 4.5e-04       0.00    4512.16 0.0e+00       0.00    3090.58 0.0e+00       0.00    7957.08 0.0e+00 
  496  496       0.00    4423.80       0.00    1652.21 5.4e-04       0.00    4576.03 0.0e+00       0.00    3166.95 0.0e+00       0.00    7844.63 0.0e+00 
  512  512       0.00    4436.95       0.00     920.79 5.1e-04       0.00    4403.55 0.0e+00       0.00    1827.26 0.0e+00       0.00    7803.78 0.0e+00 
  528  528       0.00    4317.63       0.00     837.14 5.3e-04       0.00    4464.77 0.0e+00       0.00    2161.35 0.0e+00       0.00    7975.70 0.0e+00 
  544  544       0.00    4303.09       0.00     627.96 7.0e-04       0.00    4513.13 0.0e+00       0.00    1863.42 0.0e+00       0.00    7985.77 0.0e+00 
  560  560       0.00    4428.56       0.00     410.90 5.7e-04       0.00    4491.34 0.0e+00       0.00    1425.02 0.0e+00       0.00    7732.86 0.0e+00 
  576  576       0.00    4365.13       0.00     544.58 6.3e-04       0.00    4328.91 0.0e+00       0.00    1829.37 0.0e+00       0.00    7625.40 0.0e+00 
  592  592       0.00    4314.35       0.00     478.31 4.7e-04       0.00    4821.36 0.0e+00       0.00    1606.48 0.0e+00       0.00    7864.04 0.0e+00 
  608  608       0.00    4231.09       0.00     437.23 4.3e-04       0.00    4358.22 0.0e+00       0.00    1432.79 0.0e+00       0.00    7944.22 0.0e+00 
  624  624       0.00    4247.60       0.00     340.09 4.6e-04       0.00    4414.10 0.0e+00       0.00    1190.32 0.0e+00       0.00    7700.23 0.0e+00 
  640  640       0.00    4263.17       0.00     342.84 7.3e-04       0.00    4230.37 0.0e+00       0.00    1259.57 0.0e+00       0.00    7393.47 0.0e+00 
  656  656       0.00    4283.15       0.00     387.60 6.9e-04       0.00    4745.13 0.0e+00       0.00    1326.45 0.0e+00       0.00    7596.91 0.0e+00 
  672  672       0.00    4091.93       0.00     410.84 4.2e-04       0.00    4190.53 0.0e+00       0.00    1382.88 0.0e+00       0.00    7633.34 0.0e+00 
  688  688       0.00    4056.49       0.00     312.63 3.3e-04       0.00    4297.64 0.0e+00       0.00    1076.56 0.0e+00       0.00    7406.76 0.0e+00 
  704  704       0.00    4418.62       0.00     342.67 4.8e-04       0.00    4283.36 0.0e+00       0.00    1220.08 0.0e+00       0.00    7015.69 0.0e+00 
  720  720       0.00    4131.22       0.00     332.01 3.5e-04       0.00    4131.22 0.0e+00       0.00    1188.43 0.0e+00       0.00    6904.22 0.0e+00 
  736  736       0.00    3967.91       0.00     364.66 3.3e-04       0.00    4050.70 0.0e+00       0.00    1235.91 0.0e+00       0.00    6948.27 0.0e+00 
  752  752       0.00    4289.37       0.00     329.24 3.2e-04       0.00    4074.34 0.0e+00       0.00    1090.61 0.0e+00       0.00    6821.43 0.0e+00 
  768  768       0.00    4260.23       0.00     300.47 3.5e-04       0.00    4120.72 0.0e+00       0.00    1062.37 0.0e+00       0.00    6342.05 0.0e+00 
  784  784       0.00    3958.67       0.00     307.52 2.9e-04       0.00    3981.04 0.0e+00       0.00    1084.72 0.0e+00       0.00    6407.68 0.0e+00 
  800  800       0.00    3803.98       0.00     326.02 3.7e-04       0.00    3842.40 0.0e+00       0.00    1075.87 0.0e+00       0.00    6415.64 0.0e+00 
  816  816       0.00    3930.96       0.00     314.96 3.3e-04       0.00    3784.38 0.0e+00       0.00    1017.57 0.0e+00       0.00    6311.34 0.0e+00 
  832  832       0.00    3948.05       0.00     304.76 3.5e-04       0.00    3941.75 0.0e+00       0.00    1020.07 0.0e+00       0.00    6012.12 0.0e+00 
  848  848       0.00    3754.63       0.00     300.89 3.7e-04       0.00    3885.45 0.0e+00       0.00    1059.67 0.0e+00       0.00    5808.55 0.0e+00 
  864  864       0.00    4078.23       0.00     313.71 3.6e-04       0.00    4003.53 0.0e+00       0.00    1045.70 0.0e+00       0.00    5751.35 0.0e+00 
  880  880       0.00    3874.20       0.00     309.94 3.0e-04       0.00    3775.58 0.0e+00       0.00    1022.79 0.0e+00       0.00    5790.17 0.0e+00 
  896  896       0.00    3727.14       0.01     305.24 3.7e-04       0.00    3841.06 0.0e+00       0.00    1007.73 0.0e+00       0.00    5739.69 0.0e+00 
  912  912       0.00    3888.64       0.01     302.62 2.5e-04       0.00    3812.99 0.0e+00       0.00     998.64 0.0e+00       0.00    5295.82 0.0e+00 
  928  928       0.00    3894.65       0.01     310.19 2.2e-04       0.00    3894.65 0.0e+00       0.00    1018.31 0.0e+00       0.00    5138.56 0.0e+00 
  944  944       0.00    3793.39       0.01     308.01 2.8e-04       0.00    3673.42 0.0e+00       0.00     972.66 0.0e+00       0.00    5333.44 0.0e+00 
  960  960       0.00    3577.67       0.01     313.51 2.8e-04       0.00    3928.06 0.0e+00       0.00     955.61 0.0e+00       0.00    5145.21 0.0e+00 
  976  976       0.00    3812.26       0.01     311.91 2.4e-04       0.00    3926.62 0.0e+00       0.00     972.13 0.0e+00       0.00    5163.87 0.0e+00 
  992  992       0.00    3741.33       0.01     295.37 3.0e-04       0.00    3580.22 0.0e+00       0.00     966.66 0.0e+00       0.00    4958.60 0.0e+00 
 1008 1008       0.00    3696.61       0.01     332.69 2.1e-04       0.00    3720.64 0.0e+00       0.00     975.91 0.0e+00       0.00    5304.64 0.0e+00 
 1024 1024       0.00    3776.72       0.01     305.19 2.9e-04       0.00    3566.90 0.0e+00       0.00     839.27 0.0e+00       0.00    5140.53 0.0e+00 
 1040 1040       0.00    3657.57       0.01     314.80 1.7e-04       0.00    3419.50 0.0e+00       0.00     983.75 0.0e+00       0.00    5367.32 0.0e+00 
 1056 1056       0.00    3570.12       0.01     290.07 2.4e-04       0.00    3837.88 0.0e+00       0.00     953.39 0.0e+00       0.00    5132.05 0.0e+00 
 1072 1072       0.00    3532.78       0.01     313.56 2.1e-04       0.00    3762.72 0.0e+00       0.00    1011.75 0.0e+00       0.00    4661.59 0.0e+00 
 1088 1088       0.00    3694.98       0.01     301.46 2.3e-04       0.00    3766.04 0.0e+00       0.00    1012.03 0.0e+00       0.00    4974.01 0.0e+00 
 1104 1104       0.00    3536.17       0.01     317.04 1.7e-04       0.00    3536.17 0.0e+00       0.00     931.15 0.0e+00       0.00    5143.52 0.0e+00 
 1120 1120       0.00    3650.79       0.01     301.19 3.3e-04       0.00    3739.78 0.0e+00       0.00     953.77 0.0e+00       0.00    4517.86 0.0e+00 
 1136 1136       0.00    3849.72       0.01     305.16 2.3e-04       0.00    3544.56 0.0e+00       0.00     938.96 0.0e+00       0.00    4570.37 0.0e+00 
 1152 1152       0.00    3558.18       0.01     313.82 2.5e-04       0.00    3531.63 0.0e+00       0.00     902.82 0.0e+00       0.00    4538.25 0.0e+00 
 1168 1168       0.00    3712.28       0.01     297.78 2.2e-04       0.00    3712.28 0.0e+00       0.00     982.66 0.0e+00       0.00    4838.86 0.0e+00 
 1184 1184       0.00    3646.36       0.01     280.49 2.3e-04       0.00    3722.86 0.0e+00       0.00     892.47 0.0e+00       0.00    4793.82 0.0e+00 
 1200 1200       0.00    3588.40       0.01     288.12 2.4e-04       0.00    3860.81 0.0e+00       0.00     950.80 0.0e+00       0.00    4845.65 0.0e+00 
 1216 1216       0.00    3523.34       0.01     295.85 2.2e-04       0.00    3461.48 0.0e+00       0.00     887.56 0.0e+00       0.00    4615.30 0.0e+00 
 1232 1232       0.00    3583.52       0.01     303.69 2.0e-04       0.00    3462.04 0.0e+00       0.00     880.70 0.0e+00       0.00    4737.53 0.0e+00 
 1248 1248       0.00    3796.17       0.01     283.30 2.0e-04       0.00    3552.53 0.0e+00       0.00     934.88 0.0e+00       0.00    4487.41 0.0e+00 
 1264 1264       0.00    3516.32       0.01     290.61 3.6e-04       0.00    3580.25 0.0e+00       0.00     990.96 0.0e+00       0.00    4533.44 0.0e+00 
 1280 1280       0.00    3605.89       0.01     295.03 1.8e-04       0.00    3441.98 0.0e+00       0.00     917.86 0.0e+00       0.00    4499.91 0.0e+00 
 1296 1296       0.00    3574.38       0.01     274.95 2.1e-04       0.00    3696.58 0.0e+00       0.00     974.55 0.0e+00       0.00    4490.89 0.0e+00 
 1312 1312       0.00    3581.76       0.01     275.52 2.2e-04       0.00    3581.76 0.0e+00       0.00     895.44 0.0e+00       0.00    4915.53 0.0e+00 
 1328 1328       0.00    3704.92       0.01     288.70 2.1e-04       0.00    3457.93 0.0e+00       0.00     952.69 0.0e+00       0.00    4426.66 0.0e+00 
 1344 1344       0.00    3469.46       0.01     289.12 1.7e-04       0.00    3745.43 0.0e+00       0.00     867.36 0.0e+00       0.00    4435.38 0.0e+00 
 1360 1360       0.00    3465.07       0.01     296.04 2.9e-04       0.00    3633.28 0.0e+00       0.00     925.14 0.0e+00       0.00    4474.31 0.0e+00 
 1376 1376       0.00    3409.32       0.01     265.17 2.1e-04       0.00    3636.60 0.0e+00       0.00     871.27 0.0e+00       0.00    4304.69 0.0e+00 
 1392 1392       0.00    3721.65       0.01     281.94 1.7e-04       0.00    3418.56 0.0e+00       0.00     930.41 0.0e+00       0.00    4334.88 0.0e+00 
 1408 1408       0.00    3331.72       0.01     277.64 1.8e-04       0.00    3450.71 0.0e+00       0.00     872.59 0.0e+00       0.00    4254.80 0.0e+00 
 1424 1424       0.00    3691.85       0.01     283.99 1.8e-04       0.00    3448.43 0.0e+00       0.00     933.10 0.0e+00       0.00    4178.69 0.0e+00 
 1440 1440       0.00    3432.06       0.02     276.58 1.5e-04       0.00    3696.06 0.0e+00       0.01     829.73 0.0e+00       0.00    4261.78 0.0e+00 
 1456 1456       0.00    3605.13       0.02     269.90 1.6e-04       0.00    3508.74 0.0e+00       0.00     933.09 0.0e+00       0.00    4125.66 0.0e+00 
 1472 1472       0.00    3598.08       0.02     275.87 2.0e-04       0.00    3598.08 0.0e+00       0.01     867.01 0.0e+00       0.00    4031.59 0.0e+00 
 1488 1488       0.00    3463.28       0.02     281.89 2.5e-04       0.00    3721.01 0.0e+00       0.01     841.66 0.0e+00       0.00    4208.29 0.0e+00 
 1504 1504       0.00    3529.92       0.02     263.99 1.3e-04       0.00    3661.57 0.0e+00       0.01     859.85 0.0e+00       0.00    4344.51 0.0e+00 
 1520 1520       0.00    3655.83       0.02     277.34 1.3e-04       0.00    3697.86 0.0e+00       0.01     832.02 0.0e+00       0.00    3992.00 0.0e+00 
 1536 1536       0.00    3561.55       0.02     257.46 1.8e-04       0.00    3681.70 0.0e+00       0.01     613.62 0.0e+00       0.00    3823.30 0.0e+00 
 1552 1552       0.00    3614.22       0.02     262.85 1.5e-04       0.00    3421.46 0.0e+00       0.01     876.17 0.0e+00       0.00    4144.31 0.0e+00 
 1568 1568       0.00    3639.92       0.02     268.30 1.7e-04       0.00    3639.92 0.0e+00       0.01     885.39 0.0e+00       0.00    4292.78 0.0e+00 
 1584 1584       0.00    3513.79       0.02     273.80 2.2e-04       0.00    3664.38 0.0e+00       0.01     912.67 0.0e+00       0.00    4266.74 0.0e+00 
 1600 1600       0.00    3636.34       0.02     279.36 2.3e-04       0.00    3226.61 0.0e+00       0.01     921.89 0.0e+00       0.00    4236.96 0.0e+00 
 1616 1616       0.00    3448.19       0.02     284.97 1.5e-04       0.00    3291.45 0.0e+00       0.01     949.91 0.0e+00       0.00    4179.62 0.0e+00 
 1632 1632       0.00    3463.51       0.02     266.42 2.1e-04       0.00    3536.17 0.0e+00       0.01     905.84 0.0e+00       0.00    3972.14 0.0e+00 
 1648 1648       0.00    3314.41       0.02     271.67 1.3e-04       0.00    3477.41 0.0e+00       0.01     923.69 0.0e+00       0.00    4020.76 0.0e+00 
 1664 1664       0.00    3489.86       0.02     276.97 1.8e-04       0.00    3600.65 0.0e+00       0.01     886.31 0.0e+00       0.00    4280.49 0.0e+00 
 1680 1680       0.00    3593.21       0.02     260.61 1.3e-04       0.00    3557.28 0.0e+00       0.01     923.97 0.0e+00       0.00    4065.47 0.0e+00 
 1696 1696       0.00    3348.09       0.02     261.57 1.5e-04       0.00    3337.63 0.0e+00       0.01     889.34 0.0e+00       0.00    4028.17 0.0e+00 
 1712 1712       0.00    3459.52       0.02     266.53 1.9e-04       0.00    3459.52 0.0e+00       0.01     879.54 0.0e+00       0.00    3997.91 0.0e+00 
 1728 1728       0.00    3584.22       0.02     271.53 1.2e-04       0.00    3692.83 0.0e+00       0.01     896.05 0.0e+00       0.00    4181.59 0.0e+00 
 1744 1744       0.00    3484.94       0.02     276.58 1.2e-04       0.00    3650.89 0.0e+00       0.01     851.87 0.0e+00       0.00    3982.79 0.0e+00 
 1760 1760       0.00    3532.27       0.02     258.21 1.2e-04       0.00    3492.83 0.0e+00       0.01     845.04 0.0e+00       0.00    3842.12 0.0e+00 
 1776 1776       0.00    3384.52       0.02     262.92 1.1e-04       0.00    3441.89 0.0e+00       0.01     860.47 0.0e+00       0.00    4302.36 0.0e+00 
 1792 1792       0.00    3340.65       0.02     267.68 1.1e-04       0.00    3328.97 0.0e+00       0.01     835.16 0.0e+00       0.00    4175.81 0.0e+00 
 1808 1808       0.00    3662.14       0.02     272.48 1.3e-04       0.00    3531.35 0.0e+00       0.01     832.30 0.0e+00       0.00    3923.72 0.0e+00 
 1824 1824       0.00    3327.89       0.03     255.99 1.4e-04       0.00    3751.44 0.0e+00       0.01     907.61 0.0e+00       0.00    4193.14 0.0e+00 
 1840 1840       0.00    3521.98       0.03     270.92 1.3e-04       0.00    3589.71 0.0e+00       0.01     880.50 0.0e+00       0.00    3928.36 0.0e+00 
 1856 1856       0.00    3570.96       0.03     265.05 1.8e-04       0.00    3633.61 0.0e+00       0.01     939.73 0.0e+00       0.00    4072.15 0.0e+00 
 1872 1872       0.00    3435.21       0.03     254.93 1.2e-04       0.00    3295.00 0.0e+00       0.01     828.53 0.0e+00       0.00    4136.28 0.0e+00 
 1888 1888       0.00    3565.49       0.03     259.31 1.6e-04       0.00    3565.49 0.0e+00       0.01     907.58 0.0e+00       0.00    4084.10 0.0e+00 
 1904 1904       0.00    3692.10       0.03     263.72 1.2e-04       0.00    3428.38 0.0e+00       0.01     797.76 0.0e+00       0.00    4278.88 0.0e+00 
 1920 1920       0.00    3419.19       0.03     268.17 1.8e-04       0.00    3419.19 0.0e+00       0.01     811.22 0.0e+00       0.00    4277.34 0.0e+00 
 1936 1936       0.00    3544.57       0.03     272.66 1.0e-04       0.00    3524.12 0.0e+00       0.01     824.79 0.0e+00       0.00    4048.99 0.0e+00 
 1952 1952       0.00    3658.83       0.03     254.09 1.2e-04       0.00    3430.15 0.0e+00       0.01     831.55 0.0e+00       0.00    4227.06 0.0e+00 
 1968 1968       0.00    3521.83       0.03     258.27 1.2e-04       0.00    3486.61 0.0e+00       0.01     852.28 0.0e+00       0.00    3944.44 0.0e+00 
 1984 1984       0.00    3464.78       0.03     262.48 1.2e-04       0.00    3543.52 0.0e+00       0.01     866.19 0.0e+00       0.00    3937.25 0.0e+00 
 2000 2000       0.00    3273.55       0.03     246.22 1.5e-04       0.00    3564.53 0.0e+00       0.01     872.95 0.0e+00       0.00    4161.04 0.0e+00 
 2016 2016       0.00    3496.13       0.03     271.02 1.4e-04       0.00    3547.87 0.0e+00       0.01     813.05 0.0e+00       0.00    3991.35 0.0e+00 
 2032 2032       0.00    3634.44       0.03     247.80 1.2e-04       0.00    3469.23 0.0e+00       0.01     826.01 0.0e+00       0.00    3799.64 0.0e+00 
 2048 2048       0.00    3661.38       0.04     228.84 9.5e-05       0.00    3432.54 0.0e+00       0.02     533.95 0.0e+00       0.00    3943.61 0.0e+00 
 2064 2064       0.00    3408.90       0.03     255.67 1.2e-04       0.00    3238.46 0.0e+00       0.01     852.23 0.0e+00       0.00    4175.91 0.0e+00 
 2080 2080       0.00    3461.95       0.03     266.30 1.3e-04       0.00    3461.95 0.0e+00       0.01     778.94 0.0e+00       0.00    4091.40 0.0e+00 
 2096 2096       0.00    3339.64       0.04     251.10 1.1e-04       0.00    3515.41 0.0e+00       0.01     790.97 0.0e+00       0.00    4394.26 0.0e+00 
 2112 2112       0.00    3407.04       0.04     254.95 1.1e-04       0.00    3212.35 0.0e+00       0.01     811.20 0.0e+00       0.00    3974.88 0.0e+00 
 2128 2128       0.00    3261.20       0.04     258.83 1.1e-04       0.00    3458.85 0.0e+00       0.01     823.54 0.0e+00       0.00    4364.74 0.0e+00 
 2144 2144       0.00    3260.26       0.04     250.79 1.5e-04       0.00    3402.38 0.0e+00       0.01     919.56 0.0e+00       0.00    3954.11 0.0e+00 
 2160 2160       0.00    3309.10       0.04     254.55 1.0e-04       0.00    3360.01 0.0e+00       0.01     840.00 0.0e+00       0.00    4200.01 0.0e+00 
 2176 2176       0.00    3599.41       0.04     258.33 1.4e-04       0.00    3409.97 0.0e+00       0.01     852.49 0.0e+00       0.00    4391.62 0.0e+00 
 2192 2192       0.00    3171.93       0.04     240.30 8.9e-05       0.00    3582.62 0.0e+00       0.01     786.43 0.0e+00       0.00    4133.13 0.0e+00 
 2208 2208       0.00    3457.79       0.04     265.98 1.6e-04       0.00    3457.79 0.0e+00       0.01     877.75 0.0e+00       0.00    3901.09 0.0e+00 
 2224 2224       0.00    3562.05       0.04     247.36 1.1e-04       0.00    3418.13 0.0e+00       0.01     791.57 0.0e+00       0.00    4056.78 0.0e+00 
 2240 2240       0.00    3558.73       0.04     250.94 1.2e-04       0.00    3284.98 0.0e+00       0.01     903.37 0.0e+00       0.00    4197.47 0.0e+00 
 2256 2256       0.00    3461.65       0.04     254.53 1.3e-04       0.00    3424.63 0.0e+00       0.01     814.51 0.0e+00       0.00    3979.97 0.0e+00 
 2272 2272       0.00    3510.92       0.04     258.16 1.1e-04       0.00    3614.18 0.0e+00       0.01     826.10 0.0e+00       0.00    4233.76 0.0e+00 
 2288 2288       0.00    3332.06       0.04     241.67 8.3e-05       0.00    3246.37 0.0e+00       0.01     761.61 0.0e+00       0.00    3998.47 0.0e+00 
 2304 2304       0.00    3282.28       0.04     245.06 1.3e-04       0.00    3185.74 0.0e+00       0.01     743.34 0.0e+00       0.00    4054.58 0.0e+00 
 2320 2320       0.00    3445.48       0.04     248.47 1.0e-04       0.00    3328.02 0.0e+00       0.01     807.53 0.0e+00       0.00    4306.85 0.0e+00 
 2336 2336       0.00    3165.68       0.04     251.91 9.0e-05       0.00    3572.55 0.0e+00       0.01     893.14 0.0e+00       0.00    4167.98 0.0e+00 
 2352 2352       0.00    3319.85       0.04     255.37 1.0e-04       0.00    3319.85 0.0e+00       0.01     829.96 0.0e+00       0.00    4315.80 0.0e+00 
 2368 2368       0.00    3477.34       0.04     258.86 8.3e-05       0.00    3365.16 0.0e+00       0.01     917.77 0.0e+00       0.00    4038.20 0.0e+00 
 2384 2384       0.00    3297.10       0.05     243.63 1.2e-04       0.00    3410.79 0.0e+00       0.01     826.86 0.0e+00       0.00    4030.93 0.0e+00 
 2400 2400       0.00    3456.72       0.05     246.91 7.7e-05       0.00    3561.47 0.0e+00       0.01     806.57 0.0e+00       0.00    4189.96 0.0e+00 
 2416 2416       0.00    3609.11       0.05     233.53 1.0e-04       0.00    3821.41 0.0e+00       0.01     778.44 0.0e+00       0.00    4320.32 0.0e+00 
 2432 2432       0.00    3549.50       0.05     253.54 1.1e-04       0.00    3431.19 0.0e+00       0.01     860.49 0.0e+00       0.00    4087.31 0.0e+00 
 2448 2448       0.00    3378.40       0.05     239.76 8.7e-05       0.00    3378.40 0.0e+00       0.02     799.19 0.0e+00       0.00    4315.63 0.0e+00 
 2464 2464       0.00    3157.71       0.05     260.25 9.1e-05       0.00    3400.62 0.0e+00       0.01     850.15 0.0e+00       0.00    4085.16 0.0e+00 
 2480 2480       0.00    3444.92       0.05     246.07 1.2e-04       0.00    3198.85 0.0e+00       0.02     782.94 0.0e+00       0.00    4306.15 0.0e+00 
 2496 2496       0.00    3285.58       0.05     267.05 1.1e-04       0.00    3364.88 0.0e+00       0.01     830.84 0.0e+00       0.00    4418.53 0.0e+00 
 2512 2512       0.00    3534.38       0.05     236.68 1.2e-04       0.00    3213.08 0.0e+00       0.02     757.37 0.0e+00       0.00    4291.75 0.0e+00 
 2528 2528       0.00    3323.86       0.05     255.68 1.3e-04       0.00    3137.91 0.0e+00       0.02     745.74 0.0e+00       0.00    4090.91 0.0e+00 
 2544 2544       0.00    3236.60       0.05     242.75 1.3e-04       0.00    3236.60 0.0e+00       0.02     823.86 0.0e+00       0.00    4401.78 0.0e+00 
 2560 2560       0.00    3408.54       0.06     231.35 9.3e-05       0.00    3277.44 0.0e+00       0.02     546.24 0.0e+00       0.00    4171.29 0.0e+00 
 2576 2576       0.00    3499.54       0.06     234.25 7.2e-05       0.00    3499.54 0.0e+00       0.02     796.45 0.0e+00       0.00    4114.98 0.0e+00 
 2592 2592       0.00    3359.88       0.05     251.99 8.2e-05       0.00    3225.48 0.0e+00       0.02     855.24 0.0e+00       0.00    4154.03 0.0e+00 
 2608 2608       0.00    3463.33       0.06     240.10 9.1e-05       0.00    3129.37 0.0e+00       0.02     793.68 0.0e+00       0.00    4081.78 0.0e+00 
 2624 2624       0.00    3030.14       0.05     258.25 1.1e-04       0.00    3167.88 0.0e+00       0.02     826.40 0.0e+00       0.00    4269.75 0.0e+00 
 2640 2640       0.00    3295.34       0.06     246.03 1.1e-04       0.00    3346.04 0.0e+00       0.02     760.46 0.0e+00       0.00    4055.81 0.0e+00 
 2656 2656       0.00    3245.61       0.06     249.02 8.7e-05       0.00    3386.72 0.0e+00       0.02     846.68 0.0e+00       0.00    4233.40 0.0e+00 
 2672 2672       0.00    3427.64       0.06     238.03 9.1e-05       0.00    3427.64 0.0e+00       0.02     779.01 0.0e+00       0.00    4284.55 0.0e+00 
 2688 2688       0.00    3547.65       0.06     255.06 9.4e-05       0.00    3324.28 0.0e+00       0.02     788.37 0.0e+00       0.00    4204.62 0.0e+00 
 2704 2704       0.00    3363.97       0.06     230.94 8.8e-05       0.00    3510.22 0.0e+00       0.02     731.30 0.0e+00       0.00    3802.74 0.0e+00 
 2720 2720       0.00    3498.07       0.06     233.68 9.1e-05       0.00    3255.89 0.0e+00       0.02     739.98 0.0e+00       0.00    3995.87 0.0e+00 
 2736 2736       0.00    3675.47       0.06     236.43 9.0e-05       0.00    3675.47 0.0e+00       0.02     816.77 0.0e+00       0.00    3893.27 0.0e+00 
 2752 2752       0.00    3443.13       0.06     239.21 1.1e-04       0.00    3305.40 0.0e+00       0.02     883.74 0.0e+00       0.00    4241.93 0.0e+00 
 2768 2768       0.00    3371.81       0.07     229.90 7.2e-05       0.00    3483.28 0.0e+00       0.02     766.32 0.0e+00       0.00    3984.87 0.0e+00 
 2784 2784       0.00    3523.66       0.06     244.80 9.6e-05       0.00    3523.66 0.0e+00       0.02     775.20 0.0e+00       0.00    3876.02 0.0e+00 
 2800 2800       0.00    3279.13       0.07     235.24 9.4e-05       0.00    3450.22 0.0e+00       0.02     784.14 0.0e+00       0.00    4234.36 0.0e+00 
 2816 2816       0.00    3316.71       0.07     237.94 8.1e-05       0.01    3172.51 0.0e+00       0.02     721.02 0.0e+00       0.00    4282.88 0.0e+00 
 2832 2832       0.00    3529.52       0.07     229.19 7.3e-05       0.00    3529.52 0.0e+00       0.02     729.24 0.0e+00       0.00    4083.74 0.0e+00 
 2848 2848       0.01    3245.01       0.07     243.38 9.9e-05       0.01    3245.01 0.0e+00       0.02     811.25 0.0e+00       0.00    4277.51 0.0e+00 
 2864 2864       0.00    3281.57       0.07     234.40 1.2e-04       0.00    3445.65 0.0e+00       0.02     820.39 0.0e+00       0.00    4266.04 0.0e+00 
 2880 2880       0.00    3318.34       0.07     248.88 8.9e-05       0.00    3469.17 0.0e+00       0.02     829.58 0.0e+00       0.00    3982.00 0.0e+00 
 2896 2896       0.01    3355.31       0.07     228.77 8.1e-05       0.00    3523.07 0.0e+00       0.02     838.83 0.0e+00       0.00    4026.37 0.0e+00 
 2912 2912       0.01    3238.28       0.07     242.32 1.1e-04       0.01    3392.48 0.0e+00       0.02     848.12 0.0e+00       0.00    4070.98 0.0e+00 
 2928 2928       0.01    3258.37       0.07     233.85 7.9e-05       0.01    3429.86 0.0e+00       0.02     779.51 0.0e+00       0.00    4053.47 0.0e+00 
 2944 2944       0.00    3467.44       0.07     236.42 9.2e-05       0.00    3467.44 0.0e+00       0.02     866.86 0.0e+00       0.00    4160.93 0.0e+00 
 2960 2960       0.01    3154.71       0.08     228.60 1.0e-04       0.01    3345.90 0.0e+00       0.02     730.26 0.0e+00       0.00    4301.88 0.0e+00 
 2976 2976       0.01    3382.17       0.08     231.08 9.7e-05       0.01    3366.06 0.0e+00       0.02     805.28 0.0e+00       0.00    4187.45 0.0e+00 
 2992 2992       0.01    3418.63       0.08     223.84 8.6e-05       0.01    3418.63 0.0e+00       0.02     813.96 0.0e+00       0.00    4297.71 0.0e+00 
heim$ 

Using the gnuplot script

set terminal svg size 900, 500
set output "bench.gemv.svg"
set xlabel "Matrix dimension M=N"
set ylabel "MFLOPS"
set yrange [0:12000]
set title "GEMV (col major)"
set key outside
set pointsize 0.5
plot "gemv_colmajor.dat" using 1:4  with linespoints lt 2 lw 3 title "dgemv_ref", \
     "gemv_colmajor.dat" using 1:6  with linespoints lt 3 lw 3 title "dgemv_dot", \
     "gemv_colmajor.dat" using 1:9  with linespoints lt 4 lw 3 title "dgemv_axpy", \
     "gemv_colmajor.dat" using 1:12 with linespoints lt 7 lw 3 title "dgemv_dotf", \
     "gemv_colmajor.dat" using 1:15 with linespoints lt 8 lw 3 title "dgemv_axpyf"

and running it through gnuplot

heim$ gnuplot gemv.plot
heim$ 

gives

Exercise (Gnuplot)

Use gnuplot to visualize benchmark results for the row major case.

Exercise (Fuse Factor)

Find the best fuse factor for axpyf and dotf.