11 Replies Latest reply on Oct 6, 2014 1:23 AM by dipak

    Bug submission

    ivan

      Hi there,

       

      Is there a way to submit a bug privately without sharing my source code to anyone?

        • Re: Bug submission
          ivan

          No way. Anyway I've managed to rip off some unnecessary code. So the question is why the kernel (below) stopped working properly after I installed the latest drivers? Compiling it with -O3 and -O0 options gives different results. Turning optimization off works as expected. I ran it in Win 7 x64 bit system, Radeon 5670, and 32-bit application with the latest drivers installed.

          Thank you.

           

           

          void s3( uint const a1,

            uint const a2,

            uint const a3,

            uint const a4,

            uint const a5,

            uint const a6,

            uint *out1,

            uint *out2,

            uint *out3,

            uint *out4 )

          {

            uint x1, x2, x3, x4, x5, x6, x7, x8;

            uint x9, x10, x11, x12, x13, x14, x15, x16;

            uint x17, x18, x19, x20, x21, x22, x23, x24;

            uint x25, x26, x27, x28, x29, x30, x31, x32;

            uint x33, x34, x35, x36, x37, x38, x39, x40;

            uint x41, x42, x43, x44, x45, x46, x47, x48;

            uint x49, x50, x51, x52, x53;

           

           

            x1 = a2 ^ a3;

            x2 = x1 ^ a6;

            x3 = a2 & x2;

            x4 = a5 | x3;

            x5 = x2 ^ x4;

            x6 = a3 ^ x3;

            x7 = x6 & ~a5;

            x8 = a1 | x7;

            x9 = x5 ^ x8;

            x10 = a6 & ~x3;

            x11 = x10 ^ a5;

            x12 = a1 & x11;

            x13 = a5 ^ x12;

            x14 = a4 | x13;

            x15 = x9 ^ x14;

           

           

            *out4 ^= x15;

           

           

            x16 = a3 & a6;

            x17 = x16 | x3;

            x18 = x17 ^ a5;

            x19 = x2 & ~x7;

            x20 = x19 ^ x16;

            x21 = a1 | x20;

            x22 = x18 ^ x21;

            x23 = a2 | x7;

            x24 = x23 ^ x4;

            x25 = x11 | x19;

            x26 = x25 ^ x17;

            x27 = a1 | x26;

            x28 = x24 ^ x27;

            x29 = a4 & ~x28;

            x30 = x22 ^ ~x29;

           

           

            *out3 ^= x30;

           

           

            x31 = a3 & a5;

            x32 = x31 ^ x2;

            x33 = x7 & ~a3;

            x34 = a1 | x33;

            x35 = x32 ^ x34;

            x36 = x10 | x26;

            x37 = a6 ^ x17;

            x38 = x37 & ~x5;

            x39 = a1 & x38;

            x40 = x36 ^ x39;

            x41 = a4 & x40;

            x42 = x35 ^ x41;

           

           

            *out2 ^= x42;

           

           

            x43 = a2 | x19;

            x44 = x43 ^ x18;

            x45 = a6 & x15;

            x46 = x45 ^ x6;

            x47 = x46 & ~a1;

            x48 = x44 ^ x47;

            x49 = x42 & ~x23;

            x50 = a1 | x49;

            x51 = x47 ^ x50;

            x52 = a4 & x51;

            x53 = x48 ^ ~x52;

           

           

            *out1 ^= x53;

          }

           

           

           

           

          void s(uint *k )

          {

            uint p01 = 0x00000000;

            uint p02 = 0x00000000;

            uint p03 = 0x00000000;

            uint p04 = 0xffffffff;

            uint p05 = 0x00000000;

            uint p06 = 0xffffffff;

            uint p07 = 0xffffffff;

            uint p08 = 0xffffffff;

            uint p09 = 0x00000000;

            uint p10 = 0x00000000;

            uint p11 = 0x00000000;

            uint p12 = 0x00000000;

            uint p13 = 0x00000000;

            uint p14 = 0xffffffff;

            uint p15 = 0x00000000;

            uint p16 = 0x00000000;

            uint p17 = 0xffffffff;

            uint p18 = 0xffffffff;

            uint p19 = 0x00000000;

            uint p20 = 0x00000000;

            uint p21 = 0x00000000;

            uint p22 = 0x00000000;

            uint p23 = 0xffffffff;

            uint p24 = 0x00000000;

            uint p25 = 0xffffffff;

            uint p26 = 0x00000000;

            uint p27 = 0xffffffff;

            uint p28 = 0x00000000;

            uint p29 = 0xffffffff;

            uint p30 = 0xffffffff;

            uint p31 = 0xffffffff;

            uint p32 = 0xffffffff;

            uint p33 = 0x00000000;

            uint p34 = 0x00000000;

            uint p35 = 0x00000000;

            uint p36 = 0x00000000;

            uint p37 = 0x00000000;

            uint p38 = 0x00000000;

            uint p39 = 0x00000000;

            uint p40 = 0x00000000;

            uint p41 = 0xffffffff;

            uint p42 = 0xffffffff;

            uint p43 = 0xffffffff;

            uint p44 = 0x00000000;

            uint p45 = 0xffffffff;

            uint p46 = 0x00000000;

            uint p47 = 0x00000000;

            uint p48 = 0x00000000;

            uint p49 = 0x00000000;

            uint p50 = 0x00000000;

            uint p51 = 0x00000000;

            uint p52 = 0x00000000;

            uint p53 = 0x00000000;

            uint p54 = 0x00000000;

            uint p55 = 0x00000000;

            uint p56 = 0xffffffff;

            uint p57 = 0x00000000;

            uint p58 = 0x00000000;

            uint p59 = 0xffffffff;

            uint p60 = 0x00000000;

            uint p61 = 0x00000000;

            uint p62 = 0xffffffff;

            uint p63 = 0xffffffff;

            uint p64 = 0xffffffff;

           

           

            s3( k[ 2], p41 ^ k[30], p42 ^ k[22], p43 ^ k[21], k[38], p45 ^ k[50],  &p24,  &p16,  &p30,  &p06 );

           

           

            k[58] = p09;

            k[60] = p17;

            k[12] = p23;

            k[14] = p31;

            k[26] = p13;

            k[38] = p28;

            k[48] = p02;

            k[52] = p18;

            k[ 4] = p24;

            k[ 2] = p16;

            k[22] = p30;

            k[16] = p06;

            k[54] = p26;

            k[36] = p20;

            k[50] = p10;

            k[56] = p01;

            k[ 0] = p08;

            k[18] = p14;

            k[62] = p25;

            k[40] = p03;

            k[32] = p04;

            k[30] = p29;

            k[42] = p11;

            k[44] = p19;

            k[ 6] = p32;

            k[34] = p12;

            k[20] = p22;

            k[ 8] = p07;

            k[24] = p05;

            k[46] = p27;

            k[10] = p15;

            k[28] = p21;

           

           

            k[59] = p41;

            k[61] = p49;

            k[13] = p55;

            k[15] = p63;

            k[27] = p45;

            k[39] = p60;

            k[49] = p34;

            k[53] = p50;

            k[ 5] = p56;

            k[ 3] = p48;

            k[23] = p62;

            k[17] = p38;

            k[55] = p58;

            k[37] = p52;

            k[51] = p42;

            k[57] = p33;

            k[ 1] = p40;

            k[19] = p46;

            k[63] = p57;

            k[41] = p35;

            k[33] = p36;

            k[31] = p61;

            k[43] = p43;

            k[45] = p51;

            k[ 7] = p64;

            k[35] = p44;

            k[21] = p54;

            k[ 9] = p39;

            k[25] = p37;

            k[47] = p59;

            k[11] = p47;

            k[29] = p53;

          }

           

           

          __kernel

          __attribute__((vec_type_hint(uint)))

          __attribute__((reqd_work_group_size(64, 1, 1)))

          void kern(__global unsigned int *pResult, __global unsigned int *pFlag)

          {

            uint p32[64];

           

           

            for ( int i=0; i<32; i+=2 )

            {

            p32[i]=0xA5A5A5A5;

            p32[i+1]=0x5C5C5C5C;

            p32[i+32]=0x5C5C5C5C;

            p32[i+32+1]=0xA5A5A5A5;

            }

           

           

            s(p32);

           

           

            if ( get_global_id(0)==0 )

            {

            for ( int i=0; i<64; i++ )

            pResult[i]=p32[i];

            pFlag[0]=1;

            }

            return;

          }