I wrote kernel for oil painting filter. Works for GPU, but for CPU i get messed up output. I can see shapes and colors, but it's very noisy.

__kernel void oil(__global uchar4 * in, __global uchar4 * out, const int brushsize) { const int radius = brushsize >> 1; int sizeX = get_global_size(0); int sizeY = get_global_size(1); int x = get_global_id(0); int y = get_global_id(1); uchar intensity, maxIntensity = 0, maks = 0; uchar4 intensities[256]; uint4 arrays[256]; float4 params = (float4) (0.2125f, 0.7154f, 0.0721f, 0.0f), temp; int t; uchar4 p; uint Index = y * sizeX + x; intensities[0].x = '\0'; arrays[0] = '\0'; for (int i = -radius; i <= radius; i++ ) { t = y + i; if (t < 0) continue; if (t >= sizeY) break; for (int j = -radius; j <= radius; j++) { t = x + j; if (t < 0) continue; if (t < sizeX) { p = (uchar4) in[Index + i * sizeX + j]; temp = params * convert_float4(p); intensity = (uchar) temp.x + temp.y + temp.z; intensities[intensity].x++; /*if(intensities[intensity].x > maks) { maxIntensity = intensity; maks = intensities[intensity].x; }*/ arrays[intensity] += (uint4) (p); } } } maxIntensity = 0; maks = 0; for (int i = 0; i < 256; i++ ) { if ( intensities[i].x > maks ) { maxIntensity = (uchar) i; maks = intensities[i].x; } } uint4 inttemp = (uint4) (intensities[maxIntensity].x, intensities[maxIntensity].x, intensities[maxIntensity].x, 1); out[Index] = (uchar4) (arrays[maxIntensity] / inttemp); }

Please post the complete code(a compilable test case). Its easy to track down and fix the problem that way.