I am having a lot of trouble keeping the compiler from crashing.
It's impossible to optimize any code, when most changes cause
the compiler to crash. In one case i%9 causes a crash, but i+9
does not. i = i - (i/9)*9 also causes a segmentation fault. In the
end I had to use an if statement to get around this. i%9 works
fine if I rearrange other portions of the code. I was not able to
produce a test case for the above problem, but do have one for
another crash case. See below.
Setup:
Debian Wheezy
4 HD-7970 GPUs.
amdcodexl_1.3.3487-1_amd64.deb
amd-catalyst-13.11-beta-v9.4-linux-x86.x86_64.run.zip
AMD-APP-SDK-v2.9-lnx64.tgz
AMD_CodeXL_Linux_x86_64_1.3.3487.tar.gz
Kernels:
Compiling kernels using CodeXL in analyze mode. Don't suppose
a command line compiler exists?
This kernel crashes the compiler.
__kernel void Crash(__global unsigned int *g_msw, __global unsigned int *g_lsw)
{
size_t g_work_id = get_global_id(0);
int loop_cnt = 1<<16;
unsigned long first = g_msw[g_work_id];
first = (first<<32)+g_lsw[g_work_id];
unsigned long last = first+loop_cnt-1;
for(unsigned long k=first; k<=last; k++)
{
for(int n=0; n<8; n++)
{
}
}
}
Error message:
LLVM ERROR: Cannot select: 0x7f431808a1b0: i32 = setcc 0x7f43181074a0, 0x7f43180891a0, 0x7f4318089fb0 [ORD=14] [ID=29]
0x7f43181074a0: i64 = add 0x7f43180891a0, 0x7f43180894a0 [ORD=11] [ID=26]
This kernel compiles.
__kernel void No_Crash(__global unsigned int *g_msw, __global unsigned int *g_lsw)
{
size_t g_work_id = get_global_id(0);
int loop_cnt = 1<<16;
unsigned long first = g_msw[g_work_id];
first = (first<<32)+g_lsw[g_work_id];
unsigned long last = first+loop_cnt-1;
for(unsigned int k=0; k<loop_cnt; k++)
{
unsigned long k2 = first + k;
for(int n=0; n<8; n++)
{
}
}
}