cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

henry931
Journeyman III

Kernel Compilation "LLVM ERROR"

Here is the OpenCL (I've marked the statements that seem to cause the issue - lines 8 and 21):

(If I were to change tempint on those lines to any literal uint the kernel compiles fine - madness)


uint wide_add_vector(uint* res, const uint* a, const uint* b)


{


  ulong carry=0;


  #pragma unroll


    for(uint i=0;i<4;i++){


  ulong tmp=(ulong)(a)+b+carry;


  uint tempint = (uint)(tmp&0xFFFFFFFF);


  res = tempint; // <---- Problem statement


  carry=tmp>>32;


  }


  return carry;


}




uint wide_add_scalar(uint* res, const uint* a, uint b)


{


  ulong carry=b;


  #pragma unroll


    for(uint i=0;i<4;i++){


  ulong tmp=a+carry;


  uint tempint = (uint)(tmp&0xFFFFFFFF);


  res = tempint; // <---- Problem statement


  carry=tmp>>32;


  }


  return carry;


}




void wide_mul(uint* res_hi, uint* res_lo, const uint* a, const uint* b)


{




  ulong carry=0, acc=0;


  #pragma unroll


    for(uint i=0; i<4; i++){


  #pragma unroll


        for(uint j=0; j<=i; j++){


  ulong tmp=(ulong)(a)*b[i-j];


  acc+=tmp;


            carry+=(acc < tmp);


  }


  res_lo=(uint)(acc&0xFFFFFFFF);


  acc= (carry<<32) | (acc>>32);


  carry=carry>>32;


  }



  #pragma unroll


    for(uint i=1; i<4; i++){


  #pragma unroll


        for(uint j=i; j<4; j++){


  ulong tmp=(ulong)(a)*b[4-j+i-1];


  acc+=tmp;


            carry+=(acc < tmp);


  }


  res_hi[i-1]=(uint)(acc&0xFFFFFFFF);


  acc= (carry<<32) | (acc>>32);


  carry=carry>>32;


  }


  res_hi[3]=acc;


}




void wide_copy_global(__global uint *res, const uint *a)


{


  #pragma unroll


    for(uint i=0;i<8;i++){


  res=a;


  }


}




__kernel void bitecoin_miner(ulong roundId,ulong roundSalt,ulong chainHash, uint4 c, uint hashSteps, __global uint* proofBuffer)


{


    uint workerID = get_global_id(0);


   


    uint cArray[4] = {c.x,c.y,c.z,c.w};


   


    uint x[8] = {workerID,0,(uint)roundId,(uint)roundId,(uint)roundSalt,(uint)roundSalt,(uint)chainHash,(uint)chainHash};


   


    for(uint j=0;j<hashSteps;j++)


    {


        uint tmp[8];


       


        wide_mul(tmp+4, tmp, x, cArray); // cArray; not to be confused with carry.


       


        uint carry=wide_add_vector(x, tmp, x+4);


       


        wide_add_scalar(x+4, tmp+4, carry);


    }


   


    wide_copy_global(proofBuffer+8*workerID,x);


}













When run I get:


LogLevel = 2 -> 2


[MyClient], 1395075385.62, 2, Created log.


Will try to connect to address Minty at port 4000


Found 1 platforms


  Platform 0 : Advanced Micro Devices, Inc.


Choosing platform 0


Found 2 devices


  Device 0 : Tahiti


  Device 1 : Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz


Choosing device 0


LLVM ERROR: Cannot select: 0x855acbc3a0: i32 = setcc 0x855acbcca0, 0x855ac3a080, 0x855ac3a480 [ORD=52] [ID=30]


  0x855acbcca0: i64 = add 0x855ac3a080, 0x855ac3aa80 [ORD=49] [ID=28]


    0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]


      0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]


    0x855ac3aa80: i64 = mul 0x855acbcda0, 0x855ac37450 [ORD=48] [ID=27]


      0x855acbcda0: i64,ch = load 0x855ac2b1d0, 0x855ac37250, 0x855ac3a380<LD4[%scevgep106], zext from i32> [ORD=47] [ID=26]


        0x855ac37250: i32 = add 0x855ac36640, 0x855ac38960 [ORD=45] [ID=25]


          0x855ac36640: i32 = sub 0x855ac37850, 0x855ac37050 [ORD=44] [ID=24]


            0x855ac37850: i32 = FrameIndex<0> [ORD=41] [ID=1]


            0x855ac37050: i32 = shl 0x855acbbc90, 0x855ac3a980 [ORD=44] [ID=23]


              0x855acbbc90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855ac36940 [ORD=43] [ID=18]


                0x855ac36940: i32 = Register %vreg30 [ORD=43] [ID=3]


              0x855ac3a980: i32 = Constant<2> [ORD=44] [ID=4]


          0x855ac38960: i32 = Constant<8> [ORD=45] [ID=5]


        0x855ac3a380: i32 = undef [ORD=46] [ID=6]


      0x855ac37450: i64 = zero_extend 0x855acbbd90 [ORD=42] [ID=21]


        0x855acbbd90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855acbba90 [ORD=42] [ID=17]


          0x855acbba90: i32 = Register %vreg31 [ORD=42] [ID=2]


  0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]


    0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]


In function: __OpenCL_bitecoin_miner_kernel


Press any key to continue . . .


If I put it into Kernel Analyzer it just freezes.

Any ideas?

The system is:

Windows 8.1 64-bit, Visual Studio 2013

HD7970 Driver Version 13.350.1005.0

Catalyst 14.2

AMD APP SDK 2.9

Many Thanks

Henry

0 Likes
1 Solution

Hi henry931,

Your issue is not clear from your post. Can you please share your Host code with us so that we can reproduce your issue here and keep you updated.

Thanks,

AMD_Support

View solution in original post

0 Likes
2 Replies

Hi henry931,

Your issue is not clear from your post. Can you please share your Host code with us so that we can reproduce your issue here and keep you updated.

Thanks,

AMD_Support

0 Likes

Sorry for the late reply anyway I got it to compile (and work) by disabling optimisations.

This was for a piece of coursework which we have since completed so it doesn't matter to me anymore...

but if you want the whole source it is on this branch:

https://github.com/henry931/HPCE-CW6/tree/Windows-OpenCL

0 Likes