When I execute the code below and then examine the values in spin_out (by reading the buffer back to the host), they are equal to the value of c4, regardless of what I set c4 to, and regardless of the fact that the last thing the kernel does is set spin_out to 0.
Needless to say, I'm baffled at this point.
inline uint4 newSeed(uint4 seed) { uint4 a4 = (uint4) (1664525U, 1664525U, 1664525U, 1664525U); uint4 c4 = (uint4) (1013904223U, 1013904223U, 1013904223U, 1013904223U); return (a4 * seed + c4); } __kernel void the_kernel (__global int4 * spin, __global int4 * spin_out, __global uint4 * seeds, __const uint ROWS, __const uint COLS, __const uint NUM_SPIN_STATES) { /* find the global location in output */ int row = get_global_id(0); int col = get_global_id(1); int my_index = index(row, col, COLS); seeds[my_index] = newSeed(seeds[my_index]); spin_out[my_index] =0; }