Hi,
I'm running many iterations of a compute shader and reading / writing values to memory using imageLoad / imageStore.
A call to glMemoryBarrier after every iteration should ensure memory access is synchronized between compute calls. Works fine for pixel shaders, but not with compute shaders.
Shader:
#version 430
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
layout(binding=0, rgba32f) uniform image2DRect data;
void main() {
ivec2 c = ivec2(gl_WorkGroupID.xy * 8 + gl_LocalInvocationID.xy);
vec4 v = imageLoad(data, c);
imageStore(data, c, v + vec4(1));
}
Program:
float* data = new float[16 * 16 * 4];
memset(data, 0, 16 * 16 * 4 * sizeof(float));
glGenTextures(1, &image);
glBindTexture(GL_TEXTURE_RECTANGLE, image);
glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA32F, 16, 16, 0, GL_RGBA, GL_FLOAT, data);
glBindTexture(GL_TEXTURE_RECTANGLE, 0);
glUseProgram(program);
glBindImageTexture(0, image, 0, false, 0, GL_READ_WRITE, GL_RGBA32F);
for (int i = 0; i < 99; ++i) {
glDispatchCompute(2, 2, 1);
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
glBindTexture(GL_TEXTURE_RECTANGLE, image);
glGetTexImage(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, GL_FLOAT, data);
for (int y = 0; y < 16; ++y) {
for (int x = 0; x < 16; ++x) {
int i = y * 16 * 4 + x * 4;
cout << data << " ";
}
cout << endl;
}
Results (texel values):
Expected: 99
On HD 5770: 99
On HD 7850: 24
On HD 7970: 13
When using glFlush() every iteration: 99 (all cards)
Tested with Catalyst 13.6 beta / 13.8 beta