Hi there,

Is there a way to submit a bug privately without sharing my source code to anyone?

Hi there,

Is there a way to submit a bug privately without sharing my source code to anyone?

No way. Anyway I've managed to rip off some unnecessary code. So the question is why the kernel (below) stopped working properly after I installed the latest drivers? Compiling it with -O3 and -O0 options gives different results. Turning optimization off works as expected. I ran it in Win 7 x64 bit system, Radeon 5670, and 32-bit application with the latest drivers installed.

Thank you.

void s3( uint const a1,

uint const a2,

uint const a3,

uint const a4,

uint const a5,

uint const a6,

uint *out1,

uint *out2,

uint *out3,

uint *out4 )

{

uint x1, x2, x3, x4, x5, x6, x7, x8;

uint x9, x10, x11, x12, x13, x14, x15, x16;

uint x17, x18, x19, x20, x21, x22, x23, x24;

uint x25, x26, x27, x28, x29, x30, x31, x32;

uint x33, x34, x35, x36, x37, x38, x39, x40;

uint x41, x42, x43, x44, x45, x46, x47, x48;

uint x49, x50, x51, x52, x53;

x1 = a2 ^ a3;

x2 = x1 ^ a6;

x3 = a2 & x2;

x4 = a5 | x3;

x5 = x2 ^ x4;

x6 = a3 ^ x3;

x7 = x6 & ~a5;

x8 = a1 | x7;

x9 = x5 ^ x8;

x10 = a6 & ~x3;

x11 = x10 ^ a5;

x12 = a1 & x11;

x13 = a5 ^ x12;

x14 = a4 | x13;

x15 = x9 ^ x14;

*out4 ^= x15;

x16 = a3 & a6;

x17 = x16 | x3;

x18 = x17 ^ a5;

x19 = x2 & ~x7;

x20 = x19 ^ x16;

x21 = a1 | x20;

x22 = x18 ^ x21;

x23 = a2 | x7;

x24 = x23 ^ x4;

x25 = x11 | x19;

x26 = x25 ^ x17;

x27 = a1 | x26;

x28 = x24 ^ x27;

x29 = a4 & ~x28;

x30 = x22 ^ ~x29;

*out3 ^= x30;

x31 = a3 & a5;

x32 = x31 ^ x2;

x33 = x7 & ~a3;

x34 = a1 | x33;

x35 = x32 ^ x34;

x36 = x10 | x26;

x37 = a6 ^ x17;

x38 = x37 & ~x5;

x39 = a1 & x38;

x40 = x36 ^ x39;

x41 = a4 & x40;

x42 = x35 ^ x41;

*out2 ^= x42;

x43 = a2 | x19;

x44 = x43 ^ x18;

x45 = a6 & x15;

x46 = x45 ^ x6;

x47 = x46 & ~a1;

x48 = x44 ^ x47;

x49 = x42 & ~x23;

x50 = a1 | x49;

x51 = x47 ^ x50;

x52 = a4 & x51;

x53 = x48 ^ ~x52;

*out1 ^= x53;

}

void s(uint *k )

{

uint p01 = 0x00000000;

uint p02 = 0x00000000;

uint p03 = 0x00000000;

uint p04 = 0xffffffff;

uint p05 = 0x00000000;

uint p06 = 0xffffffff;

uint p07 = 0xffffffff;

uint p08 = 0xffffffff;

uint p09 = 0x00000000;

uint p10 = 0x00000000;

uint p11 = 0x00000000;

uint p12 = 0x00000000;

uint p13 = 0x00000000;

uint p14 = 0xffffffff;

uint p15 = 0x00000000;

uint p16 = 0x00000000;

uint p17 = 0xffffffff;

uint p18 = 0xffffffff;

uint p19 = 0x00000000;

uint p20 = 0x00000000;

uint p21 = 0x00000000;

uint p22 = 0x00000000;

uint p23 = 0xffffffff;

uint p24 = 0x00000000;

uint p25 = 0xffffffff;

uint p26 = 0x00000000;

uint p27 = 0xffffffff;

uint p28 = 0x00000000;

uint p29 = 0xffffffff;

uint p30 = 0xffffffff;

uint p31 = 0xffffffff;

uint p32 = 0xffffffff;

uint p33 = 0x00000000;

uint p34 = 0x00000000;

uint p35 = 0x00000000;

uint p36 = 0x00000000;

uint p37 = 0x00000000;

uint p38 = 0x00000000;

uint p39 = 0x00000000;

uint p40 = 0x00000000;

uint p41 = 0xffffffff;

uint p42 = 0xffffffff;

uint p43 = 0xffffffff;

uint p44 = 0x00000000;

uint p45 = 0xffffffff;

uint p46 = 0x00000000;

uint p47 = 0x00000000;

uint p48 = 0x00000000;

uint p49 = 0x00000000;

uint p50 = 0x00000000;

uint p51 = 0x00000000;

uint p52 = 0x00000000;

uint p53 = 0x00000000;

uint p54 = 0x00000000;

uint p55 = 0x00000000;

uint p56 = 0xffffffff;

uint p57 = 0x00000000;

uint p58 = 0x00000000;

uint p59 = 0xffffffff;

uint p60 = 0x00000000;

uint p61 = 0x00000000;

uint p62 = 0xffffffff;

uint p63 = 0xffffffff;

uint p64 = 0xffffffff;

s3( k[ 2], p41 ^ k[30], p42 ^ k[22], p43 ^ k[21], k[38], p45 ^ k[50], &p24, &p16, &p30, &p06 );

k[58] = p09;

k[60] = p17;

k[12] = p23;

k[14] = p31;

k[26] = p13;

k[38] = p28;

k[48] = p02;

k[52] = p18;

k[ 4] = p24;

k[ 2] = p16;

k[22] = p30;

k[16] = p06;

k[54] = p26;

k[36] = p20;

k[50] = p10;

k[56] = p01;

k[ 0] = p08;

k[18] = p14;

k[62] = p25;

k[40] = p03;

k[32] = p04;

k[30] = p29;

k[42] = p11;

k[44] = p19;

k[ 6] = p32;

k[34] = p12;

k[20] = p22;

k[ 8] = p07;

k[24] = p05;

k[46] = p27;

k[10] = p15;

k[28] = p21;

k[59] = p41;

k[61] = p49;

k[13] = p55;

k[15] = p63;

k[27] = p45;

k[39] = p60;

k[49] = p34;

k[53] = p50;

k[ 5] = p56;

k[ 3] = p48;

k[23] = p62;

k[17] = p38;

k[55] = p58;

k[37] = p52;

k[51] = p42;

k[57] = p33;

k[ 1] = p40;

k[19] = p46;

k[63] = p57;

k[41] = p35;

k[33] = p36;

k[31] = p61;

k[43] = p43;

k[45] = p51;

k[ 7] = p64;

k[35] = p44;

k[21] = p54;

k[ 9] = p39;

k[25] = p37;

k[47] = p59;

k[11] = p47;

k[29] = p53;

}

__kernel

__attribute__((vec_type_hint(uint)))

__attribute__((reqd_work_group_size(64, 1, 1)))

void kern(__global unsigned int *pResult, __global unsigned int *pFlag)

{

uint p32[64];

for ( int i=0; i<32; i+=2 )

{

p32[i]=0xA5A5A5A5;

p32[i+1]=0x5C5C5C5C;

p32[i+32]=0x5C5C5C5C;

p32[i+32+1]=0xA5A5A5A5;

}

s(p32);

if ( get_global_id(0)==0 )

{

for ( int i=0; i<64; i++ )

pResult[i]=p32[i];

pFlag[0]=1;

}

return;

}

To reproduce the bug I've created a simple project which creates 2 binary files on output (one for -O0 and another one for -O3 option). Download it here

Thanks for your patience.

The team was able to reproduce the issue with an older driver, but the issue no longer occurs with the latest driver.

With the latest driver optimization levels 0 and 3 produce the same result. Can you verify this at your end and confirm?