#include #include #include #include #include using namespace std; // y = y + alpha * x void axpy(int n, double alpha, const double* x, int incX, double* y, int incY) { for (int i = 0; i < n; ++i, x += incX, y += incY) { *y += alpha * *x; } } struct axpy_parameters { int n; double alpha; double* x; int incX; double* y; int incY; }; extern "C" void* run_axpy_thread(void* arg) { struct axpy_parameters* param = (struct axpy_parameters*) arg; axpy(param->n, param->alpha, param->x, param->incX, param->y, param->incY); return arg; } void mt_axpy(int n, double alpha, double* x, int incX, double* y, int incY, int nofthreads) { assert(n > 0 && nofthreads > 0); if (nofthreads > n) nofthreads = n; struct axpy_parameters* params = new axpy_parameters[nofthreads]; pthread_t* thread = new pthread_t[nofthreads]; // spawn threads and pass parameters (cache-unfriendly version) int chunk = n / nofthreads; int remainder = n % nofthreads; int nextX = 0; int nextY = 0; for (int i = 0; i < nofthreads; ++i) { int len = chunk; if (i < remainder) ++len; params[i].n = len; params[i].alpha = alpha; params[i].x = x + i * incX; params[i].incX = incX * nofthreads; params[i].y = y + i * incY; params[i].incY = incY * nofthreads; if (pthread_create(&thread[i], 0, run_axpy_thread, ¶ms[i])) { cerr << "unable to create threads, aborting" << endl; exit(1); } nextX += len; nextY += len; } // wait for all threads to finish for (int i = 0; i < nofthreads; ++i) { struct axpy_parameters* rp; if (pthread_join(thread[i], (void**) &rp)) { cerr << "pthread_join failed, aborting" << endl; exit(1); } } // cleanup delete[] params; delete[] thread; } char* cmdname; void usage() { cerr << "Usage: " << cmdname << " [len [# threads]]" << endl; exit(1); } void fill_vector(double* a, int len) { static double val = 0; while (len > 0) { *a++ = val; val = val + 1; --len; } } int main(int argc, char** argv) { const double a = 0; const double b = 1; int N = 10000; int nofthreads = 10; cmdname = *argv++; --argc; if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> N) || N <= 0) usage(); } if (argc > 0) { istringstream arg(*argv++); --argc; if (!(arg >> nofthreads) || nofthreads <= 0) usage(); } if (argc > 0) usage(); double* x = new double[N]; fill_vector(x, N); double* y = new double[N]; fill_vector(y, N); double alpha = 2; // axpy(N, alpha, x, 1, y, 1); mt_axpy(N, alpha, x, 1, y, 1, nofthreads); #ifdef OUTPUT for (int i = 0; i < N; ++i) { if (i % 8 == 0) { cout << endl; } else { cout << " "; } cout << y[i]; } cout << endl; #endif }