Sample solution

Content

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

/* return real time in seconds since start of the process */
double walltime() {
   static clock_t ticks_per_second = 0;
   if (!ticks_per_second) {
      ticks_per_second = sysconf(_SC_CLK_TCK);
   }
   struct tms timebuf;
   /* times returns the number of real time ticks passed since start */
   return (double) times(&timebuf) / ticks_per_second;
}

void init_vector(double* v, size_t len, ptrdiff_t incr) {
   for (size_t i = 0; i < len; ++i) {
      v[i*incr] = i + 1;
   }
}

void print_vector(double* v, size_t len, ptrdiff_t incr) {
   for (size_t i = 0; i < len; ++i) {
      printf(" %4.1lf", v[i*incr]);
   }
   printf("\n");
}

#define MAX_LEN 134217728

int main() {
   printf("     len t1 (separate) t2 (interleaving)      t2/t1\n");
   for (size_t len = 8192; len <= MAX_LEN/2; len *= 2) {
      printf("%8zu", len);

      /* separate vectors */
      double* vector1 = malloc(sizeof(double) * len * 2);
      double* x = vector1;
      double* y = vector1 + len;
      double t0 = walltime();
      init_vector(x, len, 1);
      init_vector(y, len, 1);
      double t1 = walltime() - t0;
      printf(" %12.2lf", t1);

      /* interleaved vectors */
      double* vector2 = malloc(sizeof(double) * len * 2);
      x = vector2; y = vector2 + 1;
      t0 = walltime();
      init_vector(x, len, 2);
      init_vector(y, len, 2);
      double t2 = walltime() - t0;
      printf(" %12.2lf %16.2lf", t2, t2/t1);
      printf("\n");

      free(vector1); free(vector2);
   }
}

Compilation and Execution

theon$ gcc -Wall -o vector3 vector3.c
theon$ ./vector3
     len t1 (separate) t2 (interleaving)      t2/t1
    8192         0.00         0.00             -NaN
   16384         0.00         0.00             -NaN
   32768         0.00         0.01              Inf
   65536         0.00         0.00             -NaN
  131072         0.00         0.01              Inf
  262144         0.00         0.01              Inf
  524288         0.01         0.01             1.00
 1048576         0.01         0.03             3.00
 2097152         0.02         0.05             2.50
 4194304         0.04         0.11             2.75
 8388608         0.06         0.17             2.83
16777216         0.11         0.35             3.18
33554432         0.23         0.75             3.26
67108864         0.44         5.78            13.14
theon$ 

Limits of the granularity

Very short time periods cannot be measured properly due to the insufficient granularity of the system clock:

theon$ gcc -Wall -o ticks ticks.c
theon$ ./ticks
ticks per second: 100
theon$ 

Hence, the precision of our timer is very limited. It is even worse as the system checks every 0.01 seconds what is actually running and accounts that tick accordingly. As we cannot control when the ticks occur, a computation that requires 0.001s might be hit by a tick while another computation taking 0.009s might complete between two ticks.

To achieve more precise measurements we need to execute computations repeatedly until some minimal amount of time has been spent.

Following variant tries to do this. It is important here that storage allocation and deallocation is done outside the loops of which the time is measured. And care should also be taken that each run gets its own storage as we otherwise run the test with possibly still cached memory sections.

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

/* return real time in seconds since start of the process */
double walltime() {
   static clock_t ticks_per_second = 0;
   if (!ticks_per_second) {
      ticks_per_second = sysconf(_SC_CLK_TCK);
   }
   struct tms timebuf;
   /* times returns the number of real time ticks passed since start */
   return (double) times(&timebuf) / ticks_per_second;
}

void init_vector(double* v, size_t len, ptrdiff_t incr) {
   for (size_t i = 0; i < len; ++i) {
      v[i*incr] = i + 1;
   }
}

void print_vector(double* v, size_t len, ptrdiff_t incr) {
   for (size_t i = 0; i < len; ++i) {
      printf(" %4.1lf", v[i*incr]);
   }
   printf("\n");
}

#define MAX_LEN 134217728
#define MAX_RUNS 1024

int main() {
   printf("     len t1 (separate) t2 (interleaving)      t2/t1\n");
   double* vector1[MAX_RUNS]; double* vector2[MAX_RUNS];
   size_t runs = MAX_RUNS;
   for (size_t len = 8192; len <= MAX_LEN/2; len *= 2) {
      printf("%8zu", len);

      for (size_t i = 0; i < runs; ++i) {
	 vector1[i] = malloc(sizeof(double) * len * 2);
	 vector2[i] = malloc(sizeof(double) * len * 2);
      }
      /* separate vectors */
      double t0 = walltime();
      for (size_t i = 0; i < runs; ++i) {
	 double* x = vector1[i];
	 double* y = vector1[i] + len;
	 init_vector(x, len, 1);
	 init_vector(y, len, 1);
      }
      double t1 = (walltime() - t0) / runs;
      printf(" %12.6lf", t1);

      /* interleaved vectors */
      t0 = walltime();
      for (size_t i = 0; i < runs; ++i) {
	 double* x = vector2[i];
	 double* y = vector2[i] + 1;
	 init_vector(x, len, 2);
	 init_vector(y, len, 2);
      }
      double t2 = (walltime() - t0) / runs;
      printf(" %12.6lf %16.6lf", t2, t2/t1);
      printf("\n");

      for (size_t i = 0; i < runs; ++i) {
	 free(vector1[i]); free(vector2[i]);
      }
      if (runs > 1) runs /= 2;
   }
}
theon$ gcc -Wall -o vector4 vector4.c
theon$ ./vector4
     len t1 (separate) t2 (interleaving)      t2/t1
    8192     0.000352     0.000693         1.972222
   16384     0.000137     0.000137         1.000000
   32768     0.000234     0.000234         1.000000
   65536     0.000469     0.000469         1.000000
  131072     0.000937     0.000938         1.000000
  262144     0.001562     0.001875         1.200000
  524288     0.003750     0.003750         1.000000
 1048576     0.007500     0.007500         1.000000
 2097152     0.015000     0.015000         1.000000
 4194304     0.025000     0.030000         1.200000
 8388608     0.060000     0.060000         1.000000
16777216     0.110000     0.690000         6.272727
33554432     0.290000     0.680000         2.344828
67108864     0.440000     1.380000         3.136364
theon$