cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

shunyo
Journeyman III

Finding minimum of square of difference between two arrays

Hi,

I have been trying to execute a simple kernel but it returns garbage values and I am unable to figure why. I want to find the closest set of planes from a given plane set using the angles between the planes. So, the criteria is to find the minimum of the square of the difference of the corresponding angles. In this case, the correct answer should be given as the planes which have near similar orientation. I am getting the desirable answer in CPU. But when I am sending it to kernel, it sends out a different answer not consistent with my calculations.

__kernel void getTransformation( __global uint* permut1, __global float2* dot1,__global int4* combo1, __global float2* dot2,__global int4* combo2, , int size1, int size2, __global float4* trans)
{
  int gid = get_global_id(0);
  float2 temp_dot;
  float min_dot = FLT_MAX;
  int ind = 0;
  for(int i=0;i<size2;i++)
  {
  temp_dot = (dot2.x - dot1[permut1[gid]].x,dot2.y - dot1[permut1[gid]].y);
  if((temp_dot.x*temp_dot.x + temp_dot.y*temp_dot.y) < min_dot)
  {
  min_dot = temp_dot.x*temp_dot.x + temp_dot.y*temp_dot.y;
  ind = i;
  }
  }
  float4 num_pl2 = combo2[ind];
  trans[gid] = convert_float4_rtp(num_pl2);
}
0 Likes
3 Replies

Hi,

   The problem may be because the vector data arrays you are passing to the kernel are not properly byte aligned. Could you tell us how you are setting arguments of the kernel and calling it from the host?

Thanks,

AMD Support

0 Likes


cl_mem d_permut1,d_combo1,d_combo2,d_dot1,d_dot2;


  // use about 1024 different combinations


  limit_size = 1024;


  unsigned int* temp_permut = (unsigned int*)malloc(sizeof(unsigned int)*limit_size);




  d_permut1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(unsigned int)*limit_size, NULL, &err);


  d_combo1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint4)*size1, NULL, &err);


  d_dot1 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float2)*size1, NULL, &err);


  d_combo2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_uint4)*size2, NULL, &err);


  d_dot2 = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float2)*size2, NULL, &err);


  d_trans   = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float4)*limit_size*4, NULL, &err);




  int j=0;


  // Transferring data starting from end


  for(int i=size1-1;i>=size1-limit_size*6;i-=6,j++)


  temp_permut = permut1;



  err |= clEnqueueWriteBuffer(commands,d_permut1, CL_TRUE, 0, sizeof(unsigned int)*limit_size, temp_permut,0,NULL,NULL);


  err |= clEnqueueWriteBuffer(commands,d_combo1 , CL_TRUE, 0, sizeof(cl_uint4)*size1, combo1, 0, NULL, NULL);


  err |= clEnqueueWriteBuffer(commands,d_dot1   , CL_TRUE, 0, sizeof(cl_float2)*size1, dot1, 0, NULL, NULL);


  err |= clEnqueueWriteBuffer(commands,d_combo2 , CL_TRUE, 0, sizeof(cl_uint4)*size2, combo2, 0, NULL, NULL);


  err |= clEnqueueWriteBuffer(commands,d_dot2   , CL_TRUE, 0, sizeof(cl_float2)*size2, dot2, 0, NULL, NULL);


  clFinish(commands);



  err |= clSetKernelArg(transKernel, 0, sizeof(cl_mem), (void*)&d_permut1);


  err |= clSetKernelArg(transKernel, 1, sizeof(cl_mem), (void*)&d_combo1);


  err |= clSetKernelArg(transKernel, 2, sizeof(cl_mem), (void*)&d_dot1);


  err |= clSetKernelArg(transKernel, 3, sizeof(cl_mem), (void*)&d_combo2);


  err |= clSetKernelArg(transKernel, 4, sizeof(cl_mem), (void*)&d_dot2);


  err |= clSetKernelArg(transKernel, 5, sizeof(unsigned int), (void*)&pl1_size);


  err |= clSetKernelArg(transKernel, 6, sizeof(unsigned int), (void*)&pl2_size);


  err |= clSetKernelArg(transKernel, 7, sizeof(cl_mem), (void*)&d_trans);



  size_t global_size[1] = {limit_size};


  size_t local_size[1] = {64};


  clEnqueueNDRangeKernel(commands,transKernel, 1, NULL, global_size, local_size, 0, NULL, NULL);


  clFinish(commands);


I am populating the dot and combo arrays in a previous kernel. I have found out that they are populating properly.

0 Likes

Hi,

Can you try rewriting your code without using vector data types and see if the same problem persists. This problem may be occurrig due to misalignment of vector data types.

0 Likes