int int_mod( int a, int b);
__kernel void compare_string( __global char* input,__global char* output, const unsigned int count )
{
//this kernel is based upon the idea that the data is layout in collumns rather that rows
//this will result in a coalesed acces pattern
//to acces a read use the get_global_id() function, each subsequent char from the read is stored with an offset of 768 bytes
//get global_id
int x = get_global_id(0);
if( x <= count )
{
//some vars,
int i=0;
int hash0=0;
int hash_current=0;
for(i=0;i<PATTERN;i++)
{
hash0 = int_mod( hash0 * BASE + input[x+768*i] , PRIME );
}
output = (int)hash0;
//printf("|%d,%d|",x,output);
//calculated the first hash, now start the roling hash for this read
hash_current = hash0;
int E = (BASE * PATTERN -1) % PRIME;
for(i=PATTERN;i<768-PATTERN;i++)
{
hash_current = int_mod( hash_current - int_mod( input[x+768*(i-PATTERN)] * E, PRIME),PRIME);
hash_current = int_mod(hash_current*BASE, PRIME);
hash_current = int_mod(hash_current + input[x+768*i],PRIME);
output[x+(i-PATTERN+1)*count] = hash_current;
//printf("|%d,%d|",x+(i-PATTERN+1)*count,hash_current);
}
}
}
int int_mod( int a, int b)
{
return (a%b +b)%b;
}
I have the above kernel. With printf i verified if the hash output is correct.
after the buffers are initialized and the kernel is started i try to retrieve the output,
strange enough i get weird results. I have no clue what i 'm doing wrong.
cl::Buffer inputBuffer ( context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , sizeof(cl_char)*MAX_READ_LENGTH*count*2, input_datar);
cl::Buffer resultBuffer( context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof (cl_int )*count*1000, output_data );
cl::Buffer index( context , CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR , sizeof(cl_int), p );
cmdqueue.finish();
//starting kernel
cl::Kernel kernel(program , "compare_string" ) ;
cmdqueue.finish();
//range x) from 0->#reads
cl::KernelFunctor kernel_func = kernel.bind( cmdqueue ,cl::NDRange( (int)((count/64)+1)*64), cl::NDRange(64) );
cmdqueue.finish();
//executing kernel
kernel_func(inputBuffer, resultBuffer, count);
cmdqueue.finish();
//data retrieval ?
cmdqueue.enqueueReadBuffer( resultBuffer , true , 0 , sizeof(cl_int)*count*1000, output_data);
cmdqueue.finish();
for(i=0;i<500;i++)
{
printf("%d\n",output_data);
}
The last for loop in the main program outputs something like :
2040812460
-854044569
377289699
-2048191384
-1881616239
1121052001
-1605645626
687816837
-635997106
35425081
-871176712
....
What am i doing wrong ?