I want to use two GPU to realize a matrix caculation, and therefore I create two cmd queue for two devices.
Additionally,clBuildProgram() return CL_SUCCESS.The abstract of my code as follows:
===============================================================================================
//get platform
status = clGetPlatformIDs(0, NULL, &numPlatforms);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platforms.\(clGetPlatformsIDs)\n");
return EXIT_FAILURE;
}
cl_platform_id *platforms = NULL;
if(numPlatforms > 0)
{
//Allocate enough space for each platform
platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
//fill in the platforms
status = clGetPlatformIDs(numPlatforms, platforms, NULL);
if(status != CL_SUCCESS)
{
printf("Error: Getting Platform Ids.\(clGetPlatformsIDs)\n");
return -1;
}
}
cl_uint numDevices = 0;
cl_device_id *devices = NULL;
status = clGetDeviceIDs(
platforms[0],
CL_DEVICE_TYPE_GPU,
0,
NULL,
&numDevices);
// Allocate enough space for each device
devices = (cl_device_id*)malloc(
numDevices*sizeof(cl_device_id));
// Fill in devices with clGetDeviceIDs()
status = clGetDeviceIDs(
platforms[0],
CL_DEVICE_TYPE_GPU,
numDevices,
devices,
NULL);
char devicename[128];
//print the devices' names.
for(int i=0;i<numDevices;i++){
status=clGetDeviceInfo(devices,CL_DEVICE_NAME,128*sizeof(char),devicename,NULL);
if (status != CL_SUCCESS) {
printf("Error: Getting devices Info (device name, clGetDeviceInfo)\n");
}
printf("device[%d] name:\t%s\n",i,devicename);
}
cl_context context=NULL;
context = clCreateContext(
NULL,
numDevices,
devices,
NULL,
NULL,
&status);
const char* source = readSource("matrix.cl");
cl_program program = clCreateProgramWithSource(context,1,&source,NULL,&status);
if(status != CL_SUCCESS) |
{
printf("Error: Loading Binary into cl_program \(clCreateProgramWithBinary)\n"); | |
return EXIT_FAILURE; |
}
string buildflag="-cl-std=CL2.0";
status = clBuildProgram(program, 1, devices, buildflag.c_str(), NULL, NULL); // It returns CL_SUCCESS when debug
if(status != CL_SUCCESS)
{
printf("Error: Building Program \(clBuildProgram)\n"); | |
printf("%d",status); | |
char buildLog[16384]; | |
clGetProgramBuildInfo(program,*devices,CL_PROGRAM_BUILD_LOG,sizeof(buildLog),buildLog,NULL); |
cerr<<"in kernel"<<endl; | |
cerr<<buildLog; | |
clReleaseProgram(program); | |
return EXIT_FAILURE; |
}
//kernels of device[0]
cl_kernel kernel = clCreateKernel(program, "hellocl", &status); |
cl_kernel kernelend =clCreateKernel(program, "helloclend", &status);
cl_kernel kernelagain =clCreateKernel(program, "helloclagain", &status);
//kernels of device[1]
cl_kernel kernel1 = clCreateKernel(program, "hellocl1", &status);
cl_kernel kernelend1 =clCreateKernel(program, "helloclend1", &status);
cl_kernel kernelagain1 =clCreateKernel(program, "helloclagain1", &status);
if(status != CL_SUCCESS) |
{
printf("Error: Creating Kernel from program.\(clCreateKernel)\n"); | |
return EXIT_FAILURE; |
}
cl_command_queue commandQueue = clCreateCommandQueue(context, devices[0], 0, &status); //cmd queue for device[0]
cl_command_queue commandQueue1 = clCreateCommandQueue(context, devices[1], 0, &status); //cmd queue for device[1]
if(status != CL_SUCCESS)
{
printf("Creating Command Queue.\(clCreatCommandQueue)\n");
return EXIT_FAILURE;
}
cl_command_queue_properties devProp[] = {CL_QUEUE_PROPERTIES,
CL_QUEUE_ON_DEVICE|
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|
CL_QUEUE_ON_DEVICE_DEFAULT,
CL_QUEUE_SIZE,
524288,
0};
cl_command_queue devCommandQueue; //device-side queue for device[0]
cl_command_queue devCommandQueue1; //device-side queue for device[1]
devCommandQueue = clCreateCommandQueueWithProperties(
context,
devices[0],
devProp,
&status);
devCommandQueue = clCreateCommandQueueWithProperties(
context,
devices[1],
devProp,
&status);
if(status != CL_SUCCESS)
{
printf("Creating device-Command Queue.\(clCreatCommandQueuewithproperties)\n");
return EXIT_FAILURE;
}
// PASS score-matrix in "outbuffer" in CPU to "outputBuffer" in GPU;commandQueue
status = clEnqueueWriteBuffer(commandQueue,outputBuffer,CL_TRUE,0,
(strlen(strA)+1) *(strlen(strB)+1) * 4,outbuffer,0, NULL, NULL);
status = clEnqueueWriteBuffer(commandQueue1,outputBuffer,CL_TRUE,0,
(strlen(strA)+1) *(strlen(strB)+1) * 4,outbuffer,0, NULL, NULL);
// pass strA in CPU to bufferA in GPU
status = clEnqueueWriteBuffer(commandQueue,bufferA,CL_TRUE,0,
(strlen(strA)+1) * sizeof(char),strA,0, NULL, NULL);
status = clEnqueueWriteBuffer(commandQueue1,bufferA,CL_TRUE,0,
(strlen(strA)+1) * sizeof(char),strA,0, NULL, NULL);
// pass strB in CPU to bufferB in GPU
status = clEnqueueWriteBuffer(commandQueue,bufferB,CL_TRUE,0,
(strlen(strB)+1) * sizeof(char),strB,0, NULL, NULL);
status = clEnqueueWriteBuffer(commandQueue1,bufferB,CL_TRUE,0,
(strlen(strB)+1) * sizeof(char),strB,0, NULL, NULL);
//set arguments of every kernels...
size_t globalThreads[] = {(strlen(strB)+1)/2}; // strlen(strB)+1=2246
size_t globalThreads1[] = {((strlen(strB)+1)-(strlen(strB)+1)/2)};
if(((strlen(strB)+1)/2)<256)
{
size_t localThreads[] = {(strlen(strB)+1)/2};
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalThreads,localThreads, 0, NULL,NULL);
if(status != CL_SUCCESS)
{
printf("Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
}
else
{
size_t localThreads[] = {256};
// clEnqueueNDRangeKernel: put "kernel" into "commandQueue"
status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, globalThreads,localThreads, 0, NULL,NULL); //This function returns CL_SUCCESS
if(status != CL_SUCCESS)
{
printf("Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
}
if(((strlen(strB)+1)-(strlen(strB)+1)/2)<256)
{
size_t localThreads[] = {((strlen(strB)+1)-(strlen(strB)+1)/2)};
// clEnqueueNDRangeKernel: put "kernel" into "commandQueue"
status = clEnqueueNDRangeKernel(commandQueue1, kernel1, 1, NULL, globalThreads1,localThreads, 0, NULL,NULL);
if(status != CL_SUCCESS)
{
printf("Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
}
else
{
size_t localThreads[] = {256};
// clEnqueueNDRangeKernel: put "kernel" into "commandQueue"
status = clEnqueueNDRangeKernel(commandQueue1, kernel1, 1, NULL, globalThreads1,localThreads, 0, NULL,NULL); // This function returns CL_INVALID_PROGRAM_EXECUTABLE
if(status != CL_SUCCESS)
{
printf("Error: Enqueueing kernel\n");
return EXIT_FAILURE;
}
}
===================================================================================================================================
I can attach my program file if nessesary.
Please help.
Solved! Go to Solution.
status = clBuildProgram(program, 1, devices, buildflag.c_str(), NULL, NULL); // It returns CL_SUCCESS when debug
You're building the program only for one device i.e. 1st one. You need to build the program for both the devices.
Regards,
status = clBuildProgram(program, 1, devices, buildflag.c_str(), NULL, NULL); // It returns CL_SUCCESS when debug
You're building the program only for one device i.e. 1st one. You need to build the program for both the devices.
Regards,
Thanks for your help.