cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

AndreiB
Journeyman III

IL Compiler Hangs...

When using CAL 1.1-beta compiler hangs up when trying to compile my IL program.

I can't find any bug tracking system in Developer's Zone, where am I supposed to report bugs/send repros?

Also, do you have some kind of 'early access program' for developers who are willing to use pre-release compiler/driver or we have to wait until next (beta) release?

Thanks in advance!
0 Likes
8 Replies

Please message me the Shader and I will debug it and get back to you.
0 Likes

Thanks for reply.

I've simplified code, so I can post it here.

Note that if you comment line mov g[r2.x+1].x, r12.x then shader compiles fine, so the workaround is simple. I don't know if it is legal to provide immediate offset with g[] but this shouldn't hang up compiler anyway 😃

il_ps_2_0
dcl_input vObjIndex0
dcl_literal l0, 0x67452301, 0x00000000, 0x00000000, 0x00000000
dcl_literal l1, 0xefcdab89, 0x00000000, 0x00000000, 0x00000000
dcl_literal l2, 0x98badcfe, 0x00000000, 0x00000000, 0x00000000
dcl_literal l3, 0x10325476, 0x00000000, 0x00000000, 0x00000000
dcl_literal l4, 0xc3d2e1f0, 0x00000000, 0x00000000, 0x00000000
dcl_literal l5, 0x5a827999, 0x00000000, 0x00000000, 0x00000000
dcl_literal l6, 0x6ed9eba1, 0x00000000, 0x00000000, 0x00000000
dcl_literal l7, 0x8f1bbcdc, 0x00000000, 0x00000000, 0x00000000
dcl_literal l8, 0xca62c1d6, 0x00000000, 0x00000000, 0x00000000
dcl_resource_id(0)_type(1d,unnorm)_fmtx(uint)_fmty(uint)_fmtz(uint)_fmtw(uint)
dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000
dcl_literal l10, 0x00000001, 0x00000000, 0x00000000, 0x00000000
dcl_literal l11, 0x00000002, 0x00000000, 0x00000000, 0x00000000
dcl_literal l12, 0x00000003, 0x00000000, 0x00000000, 0x00000000
dcl_literal l13, 0x00000004, 0x00000000, 0x00000000, 0x00000000
dcl_literal l14, 0x00000005, 0x00000000, 0x00000000, 0x00000000
dcl_literal l15, 0x00000006, 0x00000000, 0x00000000, 0x00000000
dcl_literal l16, 0x00000007, 0x00000000, 0x00000000, 0x00000000
dcl_literal l17, 0x00000008, 0x00000000, 0x00000000, 0x00000000
dcl_literal l18, 0x00000009, 0x00000000, 0x00000000, 0x00000000
dcl_literal l19, 0x0000000a, 0x00000000, 0x00000000, 0x00000000
dcl_literal l20, 0x0000000b, 0x00000000, 0x00000000, 0x00000000
dcl_literal l21, 0x0000000c, 0x00000000, 0x00000000, 0x00000000
dcl_literal l22, 0x0000000d, 0x00000000, 0x00000000, 0x00000000
dcl_literal l23, 0x0000000e, 0x00000000, 0x00000000, 0x00000000
dcl_literal l24, 0x0000000f, 0x00000000, 0x00000000, 0x00000000
dcl_literal l25, 0x00000010, 0x00000000, 0x00000000, 0x00000000
dcl_literal l26, 0x00000011, 0x00000000, 0x00000000, 0x00000000
dcl_literal l27, 0x00000012, 0x00000000, 0x00000000, 0x00000000
dcl_literal l28, 0x00000013, 0x00000000, 0x00000000, 0x00000000
dcl_literal l29, 0x00000014, 0x00000000, 0x00000000, 0x00000000
dcl_literal l30, 0x00000015, 0x00000000, 0x00000000, 0x00000000
dcl_literal l31, 0x00000016, 0x00000000, 0x00000000, 0x00000000
dcl_literal l32, 0x00000017, 0x00000000, 0x00000000, 0x00000000
dcl_literal l33, 0x00000018, 0x00000000, 0x00000000, 0x00000000
dcl_literal l34, 0x00000019, 0x00000000, 0x00000000, 0x00000000
dcl_literal l35, 0x0000001a, 0x00000000, 0x00000000, 0x00000000
dcl_literal l36, 0x0000001b, 0x00000000, 0x00000000, 0x00000000
dcl_literal l37, 0x0000001c, 0x00000000, 0x00000000, 0x00000000
dcl_literal l38, 0x0000001d, 0x00000000, 0x00000000, 0x00000000
dcl_literal l39, 0x0000001e, 0x00000000, 0x00000000, 0x00000000
dcl_literal l40, 0x0000001f, 0x00000000, 0x00000000, 0x00000000
mov r2, vObjIndex0.xxxx
umul r0.x, r2, l13
utof r3, r0.x
sample_resource(0)_sampler(0)_aoffimmi( 0.0, 0.0, 0.0 ) r4, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 1.0, 0.0, 0.0 ) r5, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 2.0, 0.0, 0.0 ) r6, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 3.0, 0.0, 0.0 ) r7, r3.x000
mov r13.x, l0.x
mov r14.x, l1.x
mov r15.x, l2.x
mov r16.x, l3.x
mov r17.x, l4.x
iadd r8.x, l0.x, r17.x
iadd r9.x, l1.x, r18.x
iadd r10.x, l2.x, r13.x
iadd r11.x, l3.x, r14.x
iadd r12.x, l4.x, r15.x
umul r2, r2, l11
mov g[r2.x+0].x, r8.x
mov g[r2.x+0].y, r9.x
mov g[r2.x+0].z, r10.x
mov g[r2.x+0].w, r11.x
mov g[r2.x+1].x, r12.x


ret_dyn
end
0 Likes

AndreiB,
I have reproduced the issue and reported the problem the correct team. It seems to be handling the scalar assignment incorrectly, maybe you can try it with using vector's instead of scalars as pasted below?
il_ps_2_0
dcl_input vObjIndex0
dcl_literal l0, 0x67452301, 0x00000000, 0x00000000, 0x00000000
dcl_literal l1, 0xefcdab89, 0x00000000, 0x00000000, 0x00000000
dcl_literal l2, 0x98badcfe, 0x00000000, 0x00000000, 0x00000000
dcl_literal l3, 0x10325476, 0x00000000, 0x00000000, 0x00000000
dcl_literal l4, 0xc3d2e1f0, 0x00000000, 0x00000000, 0x00000000
dcl_literal l5, 0x5a827999, 0x00000000, 0x00000000, 0x00000000
dcl_literal l6, 0x6ed9eba1, 0x00000000, 0x00000000, 0x00000000
dcl_literal l7, 0x8f1bbcdc, 0x00000000, 0x00000000, 0x00000000
dcl_literal l8, 0xca62c1d6, 0x00000000, 0x00000000, 0x00000000
dcl_resource_id(0)_type(1d,unnorm)_fmtx(uint)_fmty(uint)_fmtz(uint)_fmtw(uint)
dcl_literal l9, 0x00000000, 0x00000000, 0x00000000, 0x00000000
dcl_literal l10, 0x00000001, 0x00000000, 0x00000000, 0x00000000
dcl_literal l11, 0x00000002, 0x00000000, 0x00000000, 0x00000000
dcl_literal l12, 0x00000003, 0x00000000, 0x00000000, 0x00000000
dcl_literal l13, 0x00000004, 0x00000000, 0x00000000, 0x00000000
dcl_literal l14, 0x00000005, 0x00000000, 0x00000000, 0x00000000
dcl_literal l15, 0x00000006, 0x00000000, 0x00000000, 0x00000000
dcl_literal l16, 0x00000007, 0x00000000, 0x00000000, 0x00000000
dcl_literal l17, 0x00000008, 0x00000000, 0x00000000, 0x00000000
dcl_literal l18, 0x00000009, 0x00000000, 0x00000000, 0x00000000
dcl_literal l19, 0x0000000a, 0x00000000, 0x00000000, 0x00000000
dcl_literal l20, 0x0000000b, 0x00000000, 0x00000000, 0x00000000
dcl_literal l21, 0x0000000c, 0x00000000, 0x00000000, 0x00000000
dcl_literal l22, 0x0000000d, 0x00000000, 0x00000000, 0x00000000
dcl_literal l23, 0x0000000e, 0x00000000, 0x00000000, 0x00000000
dcl_literal l24, 0x0000000f, 0x00000000, 0x00000000, 0x00000000
dcl_literal l25, 0x00000010, 0x00000000, 0x00000000, 0x00000000
dcl_literal l26, 0x00000011, 0x00000000, 0x00000000, 0x00000000
dcl_literal l27, 0x00000012, 0x00000000, 0x00000000, 0x00000000
dcl_literal l28, 0x00000013, 0x00000000, 0x00000000, 0x00000000
dcl_literal l29, 0x00000014, 0x00000000, 0x00000000, 0x00000000
dcl_literal l30, 0x00000015, 0x00000000, 0x00000000, 0x00000000
dcl_literal l31, 0x00000016, 0x00000000, 0x00000000, 0x00000000
dcl_literal l32, 0x00000017, 0x00000000, 0x00000000, 0x00000000
dcl_literal l33, 0x00000018, 0x00000000, 0x00000000, 0x00000000
dcl_literal l34, 0x00000019, 0x00000000, 0x00000000, 0x00000000
dcl_literal l35, 0x0000001a, 0x00000000, 0x00000000, 0x00000000
dcl_literal l36, 0x0000001b, 0x00000000, 0x00000000, 0x00000000
dcl_literal l37, 0x0000001c, 0x00000000, 0x00000000, 0x00000000
dcl_literal l38, 0x0000001d, 0x00000000, 0x00000000, 0x00000000
dcl_literal l39, 0x0000001e, 0x00000000, 0x00000000, 0x00000000
dcl_literal l40, 0x0000001f, 0x00000000, 0x00000000, 0x00000000
mov r2, vObjIndex0.xxxx
umul r0.x, r2, l13
utof r3, r0.x
sample_resource(0)_sampler(0)_aoffimmi( 0.0, 0.0, 0.0 ) r4, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 1.0, 0.0, 0.0 ) r5, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 2.0, 0.0, 0.0 ) r6, r3.x000
sample_resource(0)_sampler(0)_aoffimmi( 3.0, 0.0, 0.0 ) r7, r3.x000
mov r13.x, l0.x
mov r14.x, l1.x
mov r15.x, l2.x
mov r16.x, l3.x
mov r17.x, l4.x
iadd r8.x, l0.x, r17.x
iadd r8.y, l1.x, r18.x
iadd r8.z, l2.x, r13.x
iadd r8.w, l3.x, r14.x
iadd r9.x, l4.x, r15.x
umul r2, r2, l11
mov g[r2.x+0], r8
mov g[r2.x+1].x, r9.x


ret_dyn
end

Remember, Radeon graphics cards are 5-way scalar machines, so working in vectors will be more efficient than working with scalars.
0 Likes

Originally posted by: MicahVillmow
Remember, Radeon graphics cards are 5-way scalar machines, so working in vectors will be more efficient than working with scalars.


Yes, I know that. Currently I'm doing some research and code is proof-of-concept. Later production code will definitely use vectors.

Some more questions:

1. Is it possible to have several shaders in one .il file? I.e. now I specify "main" as entry point, do I have some way to specify some other name?

2. [Question from original post] What about 'early access program' for developers? I.e. access to pre-release tools and SDK as well as hardware? I guess this should be discussed with local AMD/ATI office, so maybe you can PM me email address of someone from Russian office who's responsible for GPGPU?

Thanks in advance.
0 Likes

I also get IL compiler hangs when the following innocent-looking piece of code:

 

kernel void foo(out double2 gamma[30])
{
    gamma[0] = double2(0,0);
    gamma[1] = double2(0,0);
    gamma[2] = double2(0,0);
    gamma[3] = double2(0,0);
    gamma[4] = double2(0,0);
    gamma[5] = double2(0,0);
    gamma[6] = double2(0,0);
}

 

0 Likes


Dear Andrei,

I'm not sure if this is quite what you had in mind for your point 1, but what you can do is to associate multiple modules each with a different image to the same context, with each module connected e.g. to the same buffers but with a different entry CALfunc. Then you just run each program in turn (though to get the correct result you seem to need to explicitly either flush or wait for completion between each one).

So something like...

CALmodule module1 = 0;
calModuleLoad(&module1, ctx, image1);
CALfunc func1 = 0;

CALname constName1=0;
CALname globName1=0;

calModuleGetEntry(&func1, ctx, module1, "main");
calModuleGetName(&constName1, ctx, module1, "cb0");
calModuleGetName(&globName1,ctx,module1,"g[]");

calCtxSetMem(ctx, constName1, constMem);
calCtxSetMem(ctx,globName1,globMem);


CALmodule module2 = 0;
calModuleLoad(&module2, ctx, image2);
CALfunc func2 = 0;

CALname constName2=0;
CALname globName2=0;

calModuleGetEntry(&func2, ctx, module2, "main");
calModuleGetName(&constName2, ctx, module2, "cb0");
calModuleGetName(&globName2,ctx,module2,"g[]");

calCtxSetMem(ctx, constName2, constMem);
calCtxSetMem(ctx,globName2,globMem);

...

calCtxRunProgram(&e, ctx, func1, &domain1);
while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0;

calCtxRunProgram(&e, ctx, func2, &domain2);
while (calCtxIsEventDone(ctx, e) == CAL_RESULT_PENDING);e=0;

...


See also the potentially related thread here.

Best,
Steven.
0 Likes

Hi,

The code below crashes IL compiler:

il_ps_2_0
dcl_input_interp(linear) r0.xy
end;

While it is illegal piece of code, it should assert some error and not segfault.

Valgrind output shows that this is somewhere in libamdcalcl.so:

==8691== Process terminating with default action of signal 11 (SIGSEGV)
==8691==  Access not within mapped region at address 0x62
==8691==    at 0x6634800: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x660E8A3: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x66191C7: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x6618B66: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x661A39D: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x674B682: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x65EAC45: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x65E4A07: (within /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x65E5756: calclCompile (in /usr/local/amdcal/lib64/libamdcalcl.so)
==8691==    by 0x5E31EB1: CalCode_init (code.c:51)
==8691==    by 0x45CA80: type_call (typeobject.c:436)
==8691==    by 0x417E32: PyObject_Call (abstract.c:1861)
==8691==

Test platform: 64-bit Ubuntu 8.04 + CAL SDK 1.1.

I won't demand bug tracking page as I did it earlier, but this would be a Right Thing (TM) anyway

 

One more question: it seems that calclGetErrorString() returns "Operational Error" on all encountered errors. Is it possible to catch exact error message (and its line number) ? Or am I doing something wrong ?

Regards,

rle

 

0 Likes

rafal.lewczuk,
The IL Compiler does absolutely no error checking and has undefined behavior on invalid IL. If you write at the IL level, then you need to guarantee that what you write is correct. If the higher level compilers are generating invalid IL, then it needs to be reported.
0 Likes