AnsweredAssumed Answered

Kernel Compilation "LLVM ERROR"

Question asked by henry931 on Mar 17, 2014
Latest reply on Mar 25, 2014 by henry931

Here is the OpenCL (I've marked the statements that seem to cause the issue - lines 8 and 21):

(If I were to change tempint on those lines to any literal uint the kernel compiles fine - madness)

uint wide_add_vector(uint* res, const uint* a, const uint* b)
{
  ulong carry=0;
  #pragma unroll
    for(uint i=0;i<4;i++){
  ulong tmp=(ulong)(a[i])+b[i]+carry;
  uint tempint = (uint)(tmp&0xFFFFFFFF);
  res[i] = tempint; // <---- Problem statement
  carry=tmp>>32;
  }
  return carry;
}


uint wide_add_scalar(uint* res, const uint* a, uint b)
{
  ulong carry=b;
  #pragma unroll
    for(uint i=0;i<4;i++){
  ulong tmp=a[i]+carry;
  uint tempint = (uint)(tmp&0xFFFFFFFF);
  res[i] = tempint; // <---- Problem statement
  carry=tmp>>32;
  }
  return carry;
}


void wide_mul(uint* res_hi, uint* res_lo, const uint* a, const uint* b)
{


  ulong carry=0, acc=0;
  #pragma unroll
    for(uint i=0; i<4; i++){
  #pragma unroll
        for(uint j=0; j<=i; j++){
  ulong tmp=(ulong)(a[j])*b[i-j];
  acc+=tmp;
            carry+=(acc < tmp);
  }
  res_lo[i]=(uint)(acc&0xFFFFFFFF);
  acc= (carry<<32) | (acc>>32);
  carry=carry>>32;
  }

  #pragma unroll
    for(uint i=1; i<4; i++){
  #pragma unroll
        for(uint j=i; j<4; j++){
  ulong tmp=(ulong)(a[j])*b[4-j+i-1];
  acc+=tmp;
            carry+=(acc < tmp);
  }
  res_hi[i-1]=(uint)(acc&0xFFFFFFFF);
  acc= (carry<<32) | (acc>>32);
  carry=carry>>32;
  }
  res_hi[3]=acc;
}


void wide_copy_global(__global uint *res, const uint *a)
{
  #pragma unroll
    for(uint i=0;i<8;i++){
  res[i]=a[i];
  }
}


__kernel void bitecoin_miner(ulong roundId,ulong roundSalt,ulong chainHash, uint4 c, uint hashSteps, __global uint* proofBuffer)
{
    uint workerID = get_global_id(0);
    
    uint cArray[4] = {c.x,c.y,c.z,c.w};
    
    uint x[8] = {workerID,0,(uint)roundId,(uint)roundId,(uint)roundSalt,(uint)roundSalt,(uint)chainHash,(uint)chainHash};
    
    for(uint j=0;j<hashSteps;j++)
    {
        uint tmp[8];
        
        wide_mul(tmp+4, tmp, x, cArray); // cArray; not to be confused with carry.
        
        uint carry=wide_add_vector(x, tmp, x+4);
        
        wide_add_scalar(x+4, tmp+4, carry);
    }
    
    wide_copy_global(proofBuffer+8*workerID,x);
}











 

When run I get:

LogLevel = 2 -> 2
[MyClient], 1395075385.62, 2, Created log.
Will try to connect to address Minty at port 4000
Found 1 platforms
  Platform 0 : Advanced Micro Devices, Inc.
Choosing platform 0
Found 2 devices
  Device 0 : Tahiti
  Device 1 : Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz
Choosing device 0
LLVM ERROR: Cannot select: 0x855acbc3a0: i32 = setcc 0x855acbcca0, 0x855ac3a080, 0x855ac3a480 [ORD=52] [ID=30]
  0x855acbcca0: i64 = add 0x855ac3a080, 0x855ac3aa80 [ORD=49] [ID=28]
    0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]
      0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]
    0x855ac3aa80: i64 = mul 0x855acbcda0, 0x855ac37450 [ORD=48] [ID=27]
      0x855acbcda0: i64,ch = load 0x855ac2b1d0, 0x855ac37250, 0x855ac3a380<LD4[%scevgep106], zext from i32> [ORD=47] [ID=26]
        0x855ac37250: i32 = add 0x855ac36640, 0x855ac38960 [ORD=45] [ID=25]
          0x855ac36640: i32 = sub 0x855ac37850, 0x855ac37050 [ORD=44] [ID=24]
            0x855ac37850: i32 = FrameIndex<0> [ORD=41] [ID=1]
            0x855ac37050: i32 = shl 0x855acbbc90, 0x855ac3a980 [ORD=44] [ID=23]
              0x855acbbc90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855ac36940 [ORD=43] [ID=18]
                0x855ac36940: i32 = Register %vreg30 [ORD=43] [ID=3]
              0x855ac3a980: i32 = Constant<2> [ORD=44] [ID=4]
          0x855ac38960: i32 = Constant<8> [ORD=45] [ID=5]
        0x855ac3a380: i32 = undef [ORD=46] [ID=6]
      0x855ac37450: i64 = zero_extend 0x855acbbd90 [ORD=42] [ID=21]
        0x855acbbd90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855acbba90 [ORD=42] [ID=17]
          0x855acbba90: i32 = Register %vreg31 [ORD=42] [ID=2]
  0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]
    0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]
In function: __OpenCL_bitecoin_miner_kernel
Press any key to continue . . .

 

If I put it into Kernel Analyzer it just freezes.

 

Any ideas?

 

The system is:

Windows 8.1 64-bit, Visual Studio 2013

HD7970 Driver Version 13.350.1005.0

Catalyst 14.2

AMD APP SDK 2.9

 

Many Thanks

Henry

Outcomes