hello!
last term we developed a "game of life" simulation in one of our lectures ("efficient programming with c++")
with a 3ghz athlon64 x2 (4gb memory) i get around 52,5 seconds (single threaded) for a 1000x1000 board with 1000 generations (random distribution of cells)
with a 2,6ghz core i5 (4gb memory) i get ~16,5 seconds (single threaded) ...
compiler is msvc++ 2008, c/c++ optimization Maximize Speed (/O2)
for code see attachment... GoL is implemented in terms of 2 1-dimensional bool arrays which are swapped every generation (pointer swap)
the question is: how can this massive difference be explained ... and are there any tweaks to get msvc++ to produce faster code for an amd?
#include <omp.h> #include "GoLAlgo.h" GoLAlgo::GoLAlgo(GoLBoard *_gamefield, unsigned int _generations) : gamefield(_gamefield), dimension(_gamefield->getDimension()), generations(_generations) { gamefield2 = GoLBoard::createBoard(dimension); //std::cout << "created new algo" << std::endl; } GoLAlgo::~GoLAlgo(void) { if( gamefield2 != NULL) { delete gamefield2; } //std::cout << "deleted algo" << std::endl; } Measurement * GoLAlgo::compute(bool b_measure) { int dimx = dimension.x; int dimy = dimension.y; // preparation: copy boards to local variables (-> saves pointer access) GoLBoard gf1(dimension); gf1.copyFrom(gamefield); GoLBoard gf2(*gamefield); if(b_measure) { this->computation = new Measurement(); this->computation->start(); } for( unsigned int g = 0; g < generations; ++g ) { // parallel execution starts here //#pragma omp parallel for for(int y=0; y<dimy; ++y) { for(int x=0; x<dimx; ++x) { // check for alive neighbours char n = gf1.field[(((y-1+dimy)%dimy)*dimx) + ((x-1+dimx)%dimx)]; // links oben n += gf1.field[(((y-1+dimy)%dimy)*dimx) + (x%dimx)]; // mitte oben n += gf1.field[(((y-1+dimy)%dimy)*dimx) + ((x+1)%dimx)]; // rechts oben n += gf1.field[(y*dimx) + ((x-1+dimx)%dimx)]; // links mitte n += gf1.field[(y*dimx) + ((x+1)%dimx)]; // rechts mitte n += gf1.field[(((y+1)%dimy)*dimx) + ((x-1+dimx)%dimx)]; // links unten n += gf1.field[(((y+1)%dimy)*dimx) + (x%dimx)]; // mitte unten n += gf1.field[(((y+1)%dimy)*dimx) + ((x+1)%dimx)]; // rechts unten int curpos = (y*dimx)+x; if( gf1.field[curpos] ) { // cell at curpos alive if( n < 2 || n > 3 ) { gf2.field[curpos] = false; } else { // 2 or 3 gf2.field[curpos] = true; } } else { // cell at curpos dead if( n == 3 ) { // cell -> alive gf2.field[curpos] = true; } else { // cell stays dead gf2.field[curpos] = false; } } } } // switch gamefields (internal pointer swap) gf1.switchField(gf2); } if( b_measure ) { this->computation->end(); } // dont forget to copy back the right board if(generations %2 == 0) { gamefield->copyFrom(&gf2); } else { gamefield->copyFrom(&gf1); } if( b_measure ) { return computation; } return NULL; } GoLAlgo *GoLAlgo::createAlgorithm(GoLBoard *_board, unsigned int _generations) { return new GoLAlgo(_board, _generations); }