I have this simple add loop which transforms a list using a +1 array index.
For a input list of 1,2,3,4,5,6,7,....
The output should be 2,3,4,5,6,7,1,....
If I use ilt op it works correctly, however if I use the ieq op it does not.
I get 2,3,4,5,6,7,7,......
What am I doing wrong?
Thanks,
Craig
"il_ps_2_0\n"
" dcl_literal l0, 0,0,0,0\n"
" dcl_literal l1, 1,0,0,0\n"
" dcl_literal l2, 6,0,0,0\n"
"mov r1, l0\n"
"whileloop\n"
"ilt r3, r1, l2\n"
"if_logicalz r3\n"
" mov r2, l0\n"
"else\n"
" mov r2, l1\n"
"endif\n"
"ieq r3, r2, l1\n"
//"ilt r3, r1, l2\n"
" break_logicalz r3\n"
" mov r4, g[r1.x]\n"
" iadd r5, r1, l1\n"
" mov g[r1.x], g[r5.x]\n"
" mov g[r5.x], r4\n"
" iadd r1, r1, l1\n"
"endloop\n"
"ret_dyn\n"
"end\n"
Thanks for you quick response.
I think I must be missing something.
For r1 = 2, ilt r3, r1, l2 (TRUE)
if_logicalz (r3 = TRUE)
so, do else (FALSE)
mov r2, l1
ieq r3, r2 ,l1 (TRUE)
so don't break
This is only running in a single thread.
Thanks again,
Craig
I made a C program which I believes have the same logic.
It correctly prints: 2_3_4_5_6_7_1_8_9_10_11_12_
int i, j;
unsigned int c[12];
int tmp;
int var;
for(i=0,j=0;i<12;i++,j++) c = j + 1;
i = 0;
while(1) {
if(i < 6) {
var = 1;
} else {
var = 0;
}
if(var == 0) break;
tmp = c;
c = c[i+1];
c[i+1] = tmp;
i++;
}
for(i=0;i<12;i++) printf("%d_", c);
printf("\n");
If I comment out some of your move g[r10] statements,
I get different results.
orig code
2,3,4,5,6,7,7
with new comments
2,3,4,5,6,7,1 (ilt) correct
2,3,4,5,6,7,7 (ieq) incorrect
Yeah your are right about the single thread issue. I am just learning
the isa.
Thanks for your help.
Craig
"il_ps_2_0\n"
" dcl_literal l0, 0,0,0,0\n"
" dcl_literal l1, 1,0,0,0\n"
" dcl_literal l2, 6,0,0,0\n"
" dcl_literal l3, 10, 10, 10, 10\n"
"mov r1, l0\n"
"mov r10, l3\n"
"whileloop\n"
"ilt r3, r1, l2\n"
//"mov g[r10.x + 1], r3\n"
"if_logicalz r3\n"
" mov r2, l0\n"
//"mov g[r10.x + 2], r2\n"
"else\n"
" mov r2, l1\n"
//"mov g[r10.x + 3], r2\n"
"endif\n"
"ieq r3, r2, l1\n"
"ilt r3, r1, l2\n"
//"mov g[r10.x + 4], r3\n"
" break_logicalz r3.x\n"
"mov g[r10.x + 5], r3.0\n"
"mov g[r10.x + 6], l3\n"
" mov r4, g[r1.x]\n"
" iadd r5, r1, l1\n"
" mov g[r1.x], g[r5.x]\n"
" mov g[r5.x], r4\n"
" iadd r1, r1, l1\n"
"mov g[r10.x + 9], r1\n"
"iadd r10, r10, l3\n"
"endloop\n"
"ret_dyn\n"
"end\n"
If I copy the global to a temp array and do the operations on the temp array,
and copy them temp array to global. I get the correct results for both ilt and ieq.
Thanks
Craig
"il_ps_2_0\n"
"dcl_indexed_temp_array x0[48]\n"
" dcl_literal l0, 0,0,0,0\n"
" dcl_literal l1, 1,0,0,0\n"
" dcl_literal l2, 6,0,0,0\n"
" dcl_literal l3, 7,0,0,0\n"
"mov r1, l0\n"
"whileloop\n"
"ilt r3, r1, l3\n"
" break_logicalz r3.x\n"
"mov x0[r1.x], g[r1.x]\n"
" iadd r1, r1, l1\n"
"endloop\n"
"mov r1, l0\n"
"mov r10, l3\n"
"whileloop\n"
"ilt r3, r1, l2\n"
"if_logicalz r3\n"
" mov r2, l0\n"
"else\n"
" mov r2, l1\n"
"endif\n"
//"ieq r3, r2, l1\n"
"ilt r3, r1, l2\n"
" break_logicalz r3.x\n"
" mov r4, x0[r1.x]\n"
" iadd r5, r1, l1\n"
" mov x0[r1.x], x0[r5.x]\n"
" mov x0[r5.x], r4\n"
" iadd r1, r1, l1\n"
"endloop\n"
"mov r1, l0\n"
"whileloop\n"
"ilt r3, r1, l3\n"
" break_logicalz r3.x\n"
"mov g[r1.x], x0[r1.x]\n"
" iadd r1, r1, l1\n"
"endloop\n"
"ret_dyn\n"
"end\n"
Thanks for the information. There is a lot to learn with gpu programming.
This was only an example of the issue I am seeing.
The full example comes from the output from my hobby gpu c like compiler I am making.
The compiler "<" construct uses the if conditional as in the example il code.
In this case the "<" construct does not work correctly. In other cases it is fine.
If I change to use the "ilt" in instead of the "ieq" the compiled code works correctly.
" break_logicalz r15\n" -> ***** if changed to r14 "ilt" then works correctly ****
See below.
Again thanks for your help.
Craig
test(int rv<> out, int pos index)
{
int i;
int j;
int t;
int t1;
int t2;
int e[12];
i = 0;
while(i<12) {
t1 = i + 1;
e = t1;
i = i + 1;
}
i = 0;
while(i < 1) {
j = 0;
while(j < 6) {
t1 = j + 1;
t2 = i + j;
t = e[t2];
e
e[t1] = t;
j = j + 1;
}
i = i + 1;
}
rv = e[pos];
}
"il_ps_2_0\n"
"dcl_output o0\n"
"dcl_input_position_interp(linear_noperspective) v0\n"
"ftoi v0, v0\n"
"dcl_indexed_temp_array x0[12]\n"
" dcl_literal l0, 0,0,0,0\n"
" mov r0, l0\n"
"whileloop\n"
" dcl_literal l1, 0,0,0,0\n"
" mov r5, l1\n"
" dcl_literal l2, 12,0,0,0\n"
" ilt r8, r0, l2\n"
" if_logicalnz r8\n"
" dcl_literal l3, 1,0,0,0\n"
" mov r5, l3\n"
"endif\n"
" dcl_literal l4, 1,0,0,0\n"
" ieq r9, r5, l4\n"
" break_logicalz r9\n"
" dcl_literal l5, 1,0,0,0\n"
" iadd r10, r0, l5\n"
" mov r3, r10\n"
" mov x0[r0.x], r3\n"
" dcl_literal l6, 1,0,0,0\n"
" iadd r11, r0, l6\n"
" mov r0, r11\n"
"endloop\n"
" dcl_literal l7, 0,0,0,0\n"
" mov r0, l7\n"
"whileloop\n"
" dcl_literal l8, 0,0,0,0\n"
" mov r6, l8\n"
" dcl_literal l9, 1,0,0,0\n"
" ilt r12, r0, l9\n"
" if_logicalnz r12\n"
" dcl_literal l10, 1,0,0,0\n"
" mov r6, l10\n"
"endif\n"
" dcl_literal l11, 1,0,0,0\n"
" ieq r13, r6, l11\n"
" break_logicalz r13\n"
" dcl_literal l12, 0,0,0,0\n"
" mov r1, l12\n"
"whileloop\n"
" dcl_literal l13, 0,0,0,0\n"
" mov r7, l13\n"
" dcl_literal l14, 6,0,0,0\n"
" ilt r14, r1, l14\n"
" if_logicalnz r14\n"
" dcl_literal l15, 1,0,0,0\n"
" mov r7, l15\n"
"endif\n"
" dcl_literal l16, 1,0,0,0\n"
" ieq r15, r7, l16\n"
" break_logicalz r15\n" -> ***** if changed to r14 "ilt" then works correctly ****
" dcl_literal l17, 1,0,0,0\n"
" iadd r16, r1, l17\n"
" mov r3, r16\n"
" iadd r17, r0, r1\n"
" mov r4, r17\n"
" mov r2, x0[r4.x]\n"
" mov x0[r1.x], x0[r3.x]\n"
" mov x0[r3.x], r2\n"
" dcl_literal l18, 1,0,0,0\n"
" iadd r18, r1, l18\n"
" mov r1, r18\n"
"endloop\n"
" dcl_literal l19, 1,0,0,0\n"
" iadd r19, r0, l19\n"
" mov r0, r19\n"
"endloop\n"
" mov o0, x0[v0.x]\n"
"ret_dyn\n"
"end\n"