cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

ankurdh
Journeyman III

Profiler doubt

Hello,

i've simulated a small particle system demo. When i run the program in VS2008, ther is a small amount of delay while the particles are moving. But when i run it with the OpenCL Profiler tool, there is a dramatic improvement!!

Can anyone tell why this is happening?

0 Likes
3 Replies
omkaranathan
Adept I

Ankurdh,

Could you post the source code?

0 Likes

i have attached the code. And omkar, can you please help me with one more thing? 

When i run the code with the device type CPU, it runs fine. But when i change the device type to GPU, the output is getting clipped past two parallel lines. Can you please run this code on your machine and help me what the problem is? 

Thanks. 

/* The .hpp header file. */ #include<GL/glut.h> #include<cstdlib> #include<cstdio> #include<iostream> #include<cstdlib> #define __NO_STD_VECTOR #define __NO_STD_STRING #include<SDKUtil/SDKFile.hpp> #include <SDKUtil/SDKCommon.hpp> #include<CL/cl.hpp> using namespace cl; #define MAX_NO_OF_PARTICLES 500 class OpenCLReference{ private: streamsdk::SDKCommon * toolkit; vector<Platform> platforms; cl_context_properties cntxtProps[3]; Context * context; vector<Device> devices; CommandQueue * cmdQueue; streamsdk::SDKFile kernelFile; Program * program; Program::Sources * kernelSource; Kernel * kernel; public: OpenCLReference(); friend void setParticlePositions(); }; class ParticleSystem { private: GLfloat particlesXValues[MAX_NO_OF_PARTICLES][MAX_NO_OF_PARTICLES]; GLfloat particlesYValues[MAX_NO_OF_PARTICLES][MAX_NO_OF_PARTICLES]; public: void initializeParticles(int,int); friend void setParticlePositions(); friend void display(); friend class OpenCLReference; }; void setParticlePositions(); void display(); --------------------------------------------------------------------------------------------------------------------------------------- /* The main.cpp file. */ --------------------------------------------------------------------------------------------------------------------------------------- #include "particleSystems.hpp" OpenCLReference * openCLReference; ParticleSystem * particleSystem; float red = 0.02f, green = 0.04f, blue = 0.3f; double x = 5; OpenCLReference::OpenCLReference(){ toolkit = new streamsdk::SDKCommon(); cl_int err = Platform::get(&platforms); toolkit->checkVal(err,CL_SUCCESS,"Platforms could not be queried"); cntxtProps[0] = CL_CONTEXT_PLATFORM; cntxtProps[1] = (cl_context_properties)(*platforms.begin())(); cntxtProps[2] = 0; context = new Context(CL_DEVICE_TYPE_GPU,cntxtProps,NULL,NULL,&err); toolkit->checkVal(err,CL_SUCCESS,"Context could not be created."); devices = context->getInfo<CL_CONTEXT_DEVICES>(&err); toolkit->checkVal(err,CL_SUCCESS,"Devices could not be queried."); cmdQueue = new CommandQueue(*context,*devices.begin(),NULL,&err); toolkit->checkVal(err,CL_SUCCESS,"Command queue could not be created"); if(!kernelFile.open("particleSystemKernelFile.txt")){ std::cout<<"Could not open kernel file."<<std::endl; std::getchar(); std::exit(-1); } kernelSource = new Program::Sources(1,std::make_pair(kernelFile.source().data(),kernelFile.source().size())); program = new Program(*context,*kernelSource,&err); toolkit->checkVal(err,CL_SUCCESS,"Program could not be created."); err = program->build(devices,NULL,NULL,NULL); toolkit->checkVal(err,CL_SUCCESS,"Program was not allocated the devices."); kernel = new Kernel(*program,"particleSystemKernel",&err); toolkit->checkVal(err,CL_SUCCESS,"Kernel could not be created."); } void ParticleSystem::initializeParticles(int x, int y){ //int x , y = 0; for(int i = 0 ; i < MAX_NO_OF_PARTICLES ; i ++){ x = 0; for(int j = 0 ; j < MAX_NO_OF_PARTICLES ; j ++){ particlesXValues = x; particlesYValues = y; x+=1; } y+=1; } } void reshape(int width, int height) { const float ar = (float) width / (float) height; glViewport(0, 0, width, height); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glFrustum(-1.0f, 1.0f, -1.0, 1.0, 2.0, 2000.0); //glOrtho(-500,500,-500,500,-100,100); glMatrixMode(GL_MODELVIEW); glLoadIdentity() ; gluLookAt(250.0f,250.0f,-1900.0f,250.0f,250.0f,0.0f,0.0f,1.0f,0.0f); } void display(void) { glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glColor3f(0.8f,1.0f,0.0f); glBegin(GL_POINTS); for(int i = 0 ; i < MAX_NO_OF_PARTICLES ; i ++) for(int j = 0 ; j < MAX_NO_OF_PARTICLES ; j ++) glVertex2f(particleSystem->particlesXValues,particleSystem->particlesYValues); glEnd(); glFlush(); glutSwapBuffers(); } void setParticlePositions(){ int err; Buffer particlesXBuffer(*(openCLReference->context), CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE , sizeof(GLfloat)*MAX_NO_OF_PARTICLES*MAX_NO_OF_PARTICLES, (void *)(particleSystem->particlesXValues),&err); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 1 could not be allocated"); Buffer particlesYBuffer(*(openCLReference->context), CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE , sizeof(GLfloat)*MAX_NO_OF_PARTICLES*MAX_NO_OF_PARTICLES, (void *)(particleSystem->particlesYValues),&err); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 2 could not be allocated"); Buffer centerXBuffer(*(openCLReference->context), CL_MEM_READ_ONLY , sizeof(float), NULL,&err); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 3 could not be allocated"); Buffer centerYBuffer(*(openCLReference->context), CL_MEM_READ_ONLY , sizeof(float), NULL,&err); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 4 could not be allocated"); Buffer maxNoOfParticlesBuffer(*(openCLReference->context), CL_MEM_READ_ONLY, sizeof(int), NULL,&err); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 4 could not be allocated"); err = openCLReference->cmdQueue->enqueueWriteBuffer(particlesXBuffer,CL_TRUE,NULL, sizeof(GLfloat)*MAX_NO_OF_PARTICLES*MAX_NO_OF_PARTICLES,(void *)particleSystem->particlesXValues); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 1 could not be copied"); err = openCLReference->cmdQueue->enqueueWriteBuffer(particlesYBuffer,CL_TRUE,NULL, sizeof(GLfloat)*MAX_NO_OF_PARTICLES*MAX_NO_OF_PARTICLES,(void *)particleSystem->particlesYValues); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Buffer 2 could not be copied"); int centerX = MAX_NO_OF_PARTICLES/2; int centerY = MAX_NO_OF_PARTICLES/2; int maxNoOfParticles = MAX_NO_OF_PARTICLES; err = openCLReference->cmdQueue->enqueueWriteBuffer(centerXBuffer,CL_TRUE,NULL, sizeof(float),(void *)&centerX,NULL,NULL); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"buffer 3 could not be copied"); err = openCLReference->cmdQueue->enqueueWriteBuffer(centerYBuffer,CL_TRUE,NULL, sizeof(float),(void *)&centerY,NULL,NULL); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"buffer 4 could not be copied"); err = openCLReference->cmdQueue->enqueueWriteBuffer(maxNoOfParticlesBuffer,CL_TRUE,NULL, sizeof(int),(void *)&maxNoOfParticles,NULL,NULL); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"buffer 5 could not be copied"); err = openCLReference->kernel->setArg<Buffer>(0,particlesXBuffer); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Arg 1 not set"); err = openCLReference->kernel->setArg<Buffer>(1,particlesYBuffer); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Arg 2 not set"); err = openCLReference->kernel->setArg<Buffer>(2,centerXBuffer); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Arg 3 not set"); err = openCLReference->kernel->setArg<Buffer>(3,centerYBuffer); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Arg 4 not set"); err = openCLReference->kernel->setArg<Buffer>(4,maxNoOfParticlesBuffer); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Arg 5 not set"); err = openCLReference->cmdQueue->enqueueNDRangeKernel(*(openCLReference->kernel),NullRange, NDRange(MAX_NO_OF_PARTICLES,MAX_NO_OF_PARTICLES),NullRange,NULL,NULL); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Kernels could not be run"); err = openCLReference->cmdQueue->enqueueReadBuffer(particlesXBuffer,CL_TRUE,NULL, sizeof(GLfloat)*MAX_NO_OF_PARTICLES*MAX_NO_OF_PARTICLES,(void *)particleSystem->particlesXValues, NULL,NULL); openCLReference->toolkit->checkVal(err,CL_SUCCESS,"Values could not be read from the kernel."); glutPostRedisplay(); } void mouseClick(int btn, int state, int x, int y){ if(btn == GLUT_LEFT_BUTTON){ particleSystem->initializeParticles(x, 480-y); } glutPostRedisplay(); } int main(int argc, char ** argv){ openCLReference = new OpenCLReference(); particleSystem = new ParticleSystem(); particleSystem->initializeParticles(0,0); glutInit(&argc, argv); glutInitWindowSize(640,480); glutInitWindowPosition(10,10); glutInitDisplayMode(GLUT_RGB | GLUT_DOUBLE | GLUT_DEPTH); glutCreateWindow("Particle System"); glClearColor(0.0f,0.0f,0.0f,0.0f); glutMouseFunc(mouseClick); glutReshapeFunc(reshape); glutDisplayFunc(display); glutIdleFunc(setParticlePositions); glutMainLoop(); } ------------------------------------------------------------------------------------------------------------------------------------- /* Kernel file. */ ------------------------------------------------------------------------------------------------------------------------------------- __kernel void particleSystemKernel( __global float * particlesXValues, __global float * particlesYValues, __global float * centerX, __global float * centerY, __global int * maxNoOfParticles) { int x = get_global_id(0); int y = get_global_id(1); int dx = *(centerX) - particlesXValues[x * (*maxNoOfParticles) + y]; int dy = *(centerY) - particlesYValues[y * (*maxNoOfParticles) + x]; particlesXValues[y * (*maxNoOfParticles) + x] += dx * sin(y * (*maxNoOfParticles) + x) * 0.1f; particlesYValues[x * (*maxNoOfParticles) + y] += dy * sin(y * (*maxNoOfParticles) + x) * 0.1f; }

0 Likes

You are hitting this limitation of the profiler due to the usage of CL_MEM_READ_WRITE flag for your buffers.

This issue has been fixed internally.

0 Likes