You are right, sorry - the problem is somwhere deeper. Anyway, I gradually reduced my working kernel into this simpliest one - it securely hangs 15.7.1 Catalyst on HD5770 (last available for this device). Now I just can't find the real cause of the problem - any further changes to the code make it alive again. I know, it looks strange now, with unnecessary local mem barrier there and other, but it shouldn't hang the runtime anyway, how do you think? The kernel source code (too many arguments are there - you can wipe unnecessary out, I think):
__constant sampler_t imageSampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
__kernel __attribute__((reqd_work_group_size(256, 1, 1))) void faultyKernel(
uint gentime,
__read_only image2d_t config,
__read_only image2d_t info,
__read_only image2d_t consts,
__read_only image2d_t olddata,
__read_only image2d_t middata,
__read_only image2d_t newdata,
__write_only image2d_t output,
__write_only image2d_t stats,
__global uint * adata,
__global uint * bdata,
__global uint * cdata,
__global uint * ddata)
{
uint w = get_local_id(0);
uint g = get_global_id(1);
uint vui;
uint4 vui4;
int2 coord;
int i;
adata = 0;
if (w < 😎 bdata = 0;
if (g + 1 != 0) {
if (w < 😎 {
coord.y = 0;
coord.x = w;
vui4 = read_imageui(newdata, imageSampler, coord);
cdata = vui4.z;
}
}
if (g + 1 != 0) {
barrier(CLK_LOCAL_MEM_FENCE);
//vui4 = (uint4)(0, 0, 0, 0);
for (i = 0; i < 4; i++) {
vui = 5;
vui4 = (uint4) (vui4.yzw, vui);
}
}
}