#include <CL\cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include <math.h>


int convertToString(const char *filename, std::string& s)
{
size_t size;
char* str;

std::fstream f(filename, (std::fstream::in | std::fstream::binary));

if(f.is_open())
{
size_t fileSize;
f.seekg(0, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(0, std::fstream::beg);

str = new char[size+1];
if(!str)
{
f.close();
return NULL;
}

f.read(str, fileSize);
f.close();
str[size] = '\0';

s = str;
delete[] str;
return 0;
}
printf("Error: Failed to open file %s\n", filename);
return 1;
}
// OpenCL kernel. Each work item takes care of one element of c

int main( int argc, char* argv[] )
{
// vector length
long int n = 100;

// vector
float *h_a;
float *h_b;
// vector
float *h_c;
float *cpu_c;

// device input
cl_float *d_a;
cl_float *d_b;
cl_float *d_c;
/*
cl_mem d_a;
cl_mem d_b;
cl_mem d_c;*/


cl_platform_id cpPlatform; // OpenCL
cl_device_id device_id; // device ID
cl_context context; // context
cl_command_queue queue; // command queue
cl_kernel kernel; // kernel

//
size_t bytes = n*sizeof(float);


h_a = (float*)malloc(bytes);
h_b = (float*)malloc(bytes);
h_c = (float*)malloc(bytes);
cpu_c= (float*)malloc(bytes);

int i;
srand( (unsigned)time( NULL ) );
for(i = 0; i < n; i++)
h_a = rand()%50;

srand( (unsigned)time( NULL ) +1000);
for(i = 0; i < n; i++)
h_b = rand()%50;

//cpu computer
for( i=0; i < n; i++)
{
cpu_c = h_a+h_b;
}

size_t globalSize, localSize;
cl_int err;

//
localSize = 2;

//
globalSize = (size_t)ceil(n/(float)localSize)*localSize;


err = clGetPlatformIDs(1, &cpPlatform, NULL);

err = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1, &device_id, NULL);


context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);


queue = clCreateCommandQueue(context, device_id, 0, &err);

//=====================Create memory====================

d_a=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRIT E,bytes,0);
d_b=(cl_float*)clSVMAlloc(context,CL_MEM_READ_WRIT E,bytes,0);
d_c=(cl_float*)clSVMAlloc(context,CL_MEM_WRITE_ONL Y,bytes,0);

clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_a,byt es,0,0,0);
clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_WRITE,d_b,byt es,0,0,0);

memcpy(d_a,h_a,bytes);
memcpy(d_b,h_b,bytes);

clEnqueueSVMUnmap(queue,d_a,0,0,0);
clEnqueueSVMUnmap(queue,d_a,0,0,0);

const char * filename = "mulmatrix.cl";
std::string sourceStr;
err = convertToString(filename, sourceStr);
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) };


cl_program program = clCreateProgramWithSource(
context,
1,
&source,
sourceSize,
NULL);

err = clBuildProgram( program, 1, &device_id, NULL, NULL, NULL );
if(err != 0)
{
printf("clBuild failed:%d\n", err);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("\n%s\n", tbuf);
return -1;
}

kernel = clCreateKernel( program, "vecAdd", NULL );

clBuildProgram(program, 0, NULL, NULL, NULL, NULL);


kernel = clCreateKernel(program, "vecAdd", &err);

err = clSetKernelArgSVMPointer(kernel, 0, &d_a);
err = clSetKernelArgSVMPointer(kernel, 1, &d_b);
err = clSetKernelArgSVMPointer(kernel, 2, &d_c);
err = clSetKernelArgSVMPointer(kernel, 3, &n);

err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize,0, NULL, NULL);
clFinish(queue);

// Read the results from the device
//==============================================outp ut===============================

clEnqueueSVMMap(queue,CL_TRUE,CL_MAP_READ,d_c,byte s,0,0,0);
memcpy(h_c,d_c,bytes);
clEnqueueSVMUnmap(queue,d_c,0,0,0);

float sum = 0;

for(i = 0; i < n; i++)
{
printf("(%.5f,%.5f)\n",h_c,cpu_c);
if(abs(cpu_c - h_c) > 0.0001)
{
printf("check failed\n");
break;
}

}
if(i ==n)
printf("check passed\n");

clSVMFree(context,d_a);
clSVMFree(context,d_b);
clSVMFree(context,d_c);

clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(queue);
clReleaseContext(context);


free(h_a);
free(h_b);
free(h_c);
return 0;
}