Hello,
I wrote the IL kernel below which scales an input in uav0 by 2 and writes the result back to uav0. It works fine. However, when I uncomment the 2 lines that use lds I get wrong results. What could be wrong? Is lds memory byte-addressable? Could this be a consistency/coherence problem?
Thank you.
"il_cs_2_0\n"
"dcl_max_thread_per_group 256\n"
"dcl_raw_uav_id(0)\n"
"dcl_lds_id(1) 32768\n"
"dcl_cb cb0[2]\n"
"dcl_literal l0, 4, 4, 4, 4\n"
"dcl_literal l1, 2, 2, 2, 2\n"
"mov r0, vTidInGrp.x\n"
"mov r1, r0\n"
"imul r2, r1, l0\n"
"mov r3, cb0[0]\n"
"iadd r4, r3, r2\n"
"uav_raw_load_id(0) r5, r4\n"
"imul r6, r5, l1\n"
//"lds_store_id(1) r2, r6\n"
//"lds_load_id(1) r6, r2\n"
"mov r7, cb0[1]\n"
"iadd r8, r7, r2\n"
"uav_raw_store_id(0) mem.xyzw, r8, r6\n"
"end\n";