Sample solution

Content

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

/* return real time in seconds since start of the process */
double walltime() {
static clock_t ticks_per_second = 0;
if (!ticks_per_second) {
ticks_per_second = sysconf(_SC_CLK_TCK);
}
struct tms timebuf;
/* times returns the number of real time ticks passed since start */
return (double) times(&timebuf) / ticks_per_second;
}

void init_vector(double* v, size_t len, ptrdiff_t incr) {
for (size_t i = 0; i < len; ++i) {
v[i*incr] = i + 1;
}
}

void print_vector(double* v, size_t len, ptrdiff_t incr) {
for (size_t i = 0; i < len; ++i) {
printf(" %4.1lf", v[i*incr]);
}
printf("\n");
}

#define MAX_LEN 134217728

int main() {
printf("     len t1 (separate) t2 (interleaving)      t2/t1\n");
for (size_t len = 8192; len <= MAX_LEN/2; len *= 2) {
printf("%8zu", len);

/* separate vectors */
double* vector1 = malloc(sizeof(double) * len * 2);
double* x = vector1;
double* y = vector1 + len;
double t0 = walltime();
init_vector(x, len, 1);
init_vector(y, len, 1);
double t1 = walltime() - t0;
printf(" %12.2lf", t1);

/* interleaved vectors */
double* vector2 = malloc(sizeof(double) * len * 2);
x = vector2; y = vector2 + 1;
t0 = walltime();
init_vector(x, len, 2);
init_vector(y, len, 2);
double t2 = walltime() - t0;
printf(" %12.2lf %16.2lf", t2, t2/t1);
printf("\n");

free(vector1); free(vector2);
}
}

Compilation and Execution

theon$gcc -Wall -o vector3 vector3.c theon$ ./vector3
len t1 (separate) t2 (interleaving)      t2/t1
8192         0.00         0.00             -NaN
16384         0.00         0.00             -NaN
32768         0.00         0.01              Inf
65536         0.00         0.00             -NaN
131072         0.00         0.01              Inf
262144         0.00         0.01              Inf
524288         0.01         0.01             1.00
1048576         0.01         0.03             3.00
2097152         0.02         0.05             2.50
4194304         0.04         0.11             2.75
8388608         0.06         0.17             2.83
16777216         0.11         0.35             3.18
33554432         0.23         0.75             3.26
67108864         0.44         5.78            13.14
theon$Limits of the granularity Very short time periods cannot be measured properly due to the insufficient granularity of the system clock: theon$ gcc -Wall -o ticks ticks.c
theon$./ticks ticks per second: 100 theon$

Hence, the precision of our timer is very limited. It is even worse as the system checks every 0.01 seconds what is actually running and accounts that tick accordingly. As we cannot control when the ticks occur, a computation that requires 0.001s might be hit by a tick while another computation taking 0.009s might complete between two ticks.

To achieve more precise measurements we need to execute computations repeatedly until some minimal amount of time has been spent.

Following variant tries to do this. It is important here that storage allocation and deallocation is done outside the loops of which the time is measured. And care should also be taken that each run gets its own storage as we otherwise run the test with possibly still cached memory sections.

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/times.h>
#include <unistd.h>

/* return real time in seconds since start of the process */
double walltime() {
static clock_t ticks_per_second = 0;
if (!ticks_per_second) {
ticks_per_second = sysconf(_SC_CLK_TCK);
}
struct tms timebuf;
/* times returns the number of real time ticks passed since start */
return (double) times(&timebuf) / ticks_per_second;
}

void init_vector(double* v, size_t len, ptrdiff_t incr) {
for (size_t i = 0; i < len; ++i) {
v[i*incr] = i + 1;
}
}

void print_vector(double* v, size_t len, ptrdiff_t incr) {
for (size_t i = 0; i < len; ++i) {
printf(" %4.1lf", v[i*incr]);
}
printf("\n");
}

#define MAX_LEN 134217728
#define MAX_RUNS 1024

int main() {
printf("     len t1 (separate) t2 (interleaving)      t2/t1\n");
double* vector1[MAX_RUNS]; double* vector2[MAX_RUNS];
size_t runs = MAX_RUNS;
for (size_t len = 8192; len <= MAX_LEN/2; len *= 2) {
printf("%8zu", len);

for (size_t i = 0; i < runs; ++i) {
vector1[i] = malloc(sizeof(double) * len * 2);
vector2[i] = malloc(sizeof(double) * len * 2);
}
/* separate vectors */
double t0 = walltime();
for (size_t i = 0; i < runs; ++i) {
double* x = vector1[i];
double* y = vector1[i] + len;
init_vector(x, len, 1);
init_vector(y, len, 1);
}
double t1 = (walltime() - t0) / runs;
printf(" %12.6lf", t1);

/* interleaved vectors */
t0 = walltime();
for (size_t i = 0; i < runs; ++i) {
double* x = vector2[i];
double* y = vector2[i] + 1;
init_vector(x, len, 2);
init_vector(y, len, 2);
}
double t2 = (walltime() - t0) / runs;
printf(" %12.6lf %16.6lf", t2, t2/t1);
printf("\n");

for (size_t i = 0; i < runs; ++i) {
free(vector1[i]); free(vector2[i]);
}
if (runs > 1) runs /= 2;
}
}
theon$gcc -Wall -o vector4 vector4.c theon$ ./vector4
len t1 (separate) t2 (interleaving)      t2/t1
8192     0.000352     0.000693         1.972222
16384     0.000137     0.000137         1.000000
32768     0.000234     0.000234         1.000000
65536     0.000469     0.000469         1.000000
131072     0.000937     0.000938         1.000000
262144     0.001562     0.001875         1.200000
524288     0.003750     0.003750         1.000000
1048576     0.007500     0.007500         1.000000
2097152     0.015000     0.015000         1.000000
4194304     0.025000     0.030000         1.200000
8388608     0.060000     0.060000         1.000000
16777216     0.110000     0.690000         6.272727
33554432     0.290000     0.680000         2.344828
67108864     0.440000     1.380000         3.136364
theon\$