#include <CL\cl.h>#include <stdio.h>#include <stdlib.h>#include <time.h>#include <iostream>#include <fstream>#include <math.h>int convertToString(const char *filename, std::string& s){size_t size;char* str;std::fstream f(filename, (std::fstream::in | std::fstream::binary));if(f.is_open()){size_t fileSize;f.seekg(0, std::fstream::end);size = fileSize = (size_t)f.tellg();f.seekg(0, std::fstream::beg);str = new char[size+1];if(!str){f.close();return NULL;}f.read(str, fileSize);f.close();str[size] = '\0';s = str;delete[] str;return 0;}printf("Error: Failed to open file %s\n", filename);return 1;}// OpenCL kernel. Each work item takes care of one element of cint main( int argc, char* argv[] ){// vector lengthlong int n = 100;// vectorfloat *h_a;float *h_b;// vectorfloat *h_c;float *cpu_c;// device inputcl_float *d_a;cl_float *d_b;cl_float *d_c;/*cl_mem d_a;cl_mem d_b;cl_mem d_c;*/cl_platform_id cpPlatform; // OpenCL cl_device_id device_id; // device IDcl_context context; // contextcl_command_queue queue; // command queuecl_kernel kernel; // kernel//size_t bytes = n*sizeof(float);h_a = (float*)malloc(bytes);h_b = (float*)malloc(bytes);h_c = (float*)malloc(bytes);cpu_c= (float*)malloc(bytes);int i;srand( (unsigned)time( NULL ) ); for(i = 0; i < n; i++)h_a = rand()%50;srand( (unsigned)time( NULL ) +1000);for(i = 0; i < n; i++)h_b = rand()%50;//cpu computerfor( i=0; i < n; i++){cpu_c = h_a+h_b;}size_t globalSize, localSize;cl_int err;//localSize = 2;//globalSize = (size_t)ceil(n/(float)localSize)*localSize;err = clGetPlatformIDs(1, &cpPlatform, NULL);err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);queue = clCreateCommandQueue(context, device_id, 0, &err);//=====================Create memory====================d_a=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRIT E,bytes,0);d_b=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRIT E,bytes,0);d_c=(cl_float*)clSVMAlloc(context,CL_MEM_WRITE_ONL Y,bytes,0);clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_a,byt es,0,0,0);clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_b,byt es,0,0,0);memcpy(d_a,h_a,bytes);memcpy(d_b,h_b,bytes);clEnqueueSVMUnmap(queue,d_a,0,0,0);clEnqueueSVMUnmap(queue,d_a,0,0,0);const char * filename = "mulmatrix.cl";std::string sourceStr;err = convertToString(filename, sourceStr);const char * source = sourceStr.c_str();size_t sourceSize[] = { strlen(source) };cl_program program = clCreateProgramWithSource(context, 1, &source,sourceSize,NULL);err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );if(err != 0){printf("clBuild failed:%d\n", err);char tbuf[0x10000];clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);printf("\n%s\n", tbuf);return -1;}kernel = clCreateKernel( program, "vecAdd", NULL );clBuildProgram(program, 0, NULL, NULL, NULL, NULL);kernel = clCreateKernel(program, "vecAdd", &err);err = clSetKernelArgSVMPointer(kernel, 0, &d_a);err = clSetKernelArgSVMPointer(kernel, 1, &d_b);err = clSetKernelArgSVMPointer(kernel, 2, &d_c);err = clSetKernelArgSVMPointer(kernel, 3, &n);err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,0, NULL, NULL);clFinish(queue);// Read the results from the device//==============================================outp ut===============================clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_READ,d_c,byte s,0,0,0);memcpy(h_c,d_c,bytes);clEnqueueSVMUnmap(queue,d_c,0,0,0);float sum = 0;for(i = 0; i < n; i++){printf("(%.5f,%.5f)\n",h_c,cpu_c);if(abs(cpu_c - h_c) > 0.0001){printf("check failed\n");break;}} if(i ==n)printf("check passed\n");clSVMFree(context,d_a);clSVMFree(context,d_b);clSVMFree(context,d_c);clReleaseProgram(program);clReleaseKernel(kernel);clReleaseCommandQueue(queue);clReleaseContext(context);free(h_a);free(h_b);free(h_c);return 0;}
Hi,
SVM is a part of OpenCL 2.0. No existing driver/ OpenCL run-time on AMD platform support OpenCL 2.0.