3 Replies Latest reply on Mar 25, 2014 12:26 PM by henry931

    Kernel Compilation "LLVM ERROR"

    henry931

      Here is the OpenCL (I've marked the statements that seem to cause the issue - lines 8 and 21):

      (If I were to change tempint on those lines to any literal uint the kernel compiles fine - madness)

      uint wide_add_vector(uint* res, const uint* a, const uint* b)
      {
        ulong carry=0;
        #pragma unroll
          for(uint i=0;i<4;i++){
        ulong tmp=(ulong)(a[i])+b[i]+carry;
        uint tempint = (uint)(tmp&0xFFFFFFFF);
        res[i] = tempint; // <---- Problem statement
        carry=tmp>>32;
        }
        return carry;
      }
      
      
      uint wide_add_scalar(uint* res, const uint* a, uint b)
      {
        ulong carry=b;
        #pragma unroll
          for(uint i=0;i<4;i++){
        ulong tmp=a[i]+carry;
        uint tempint = (uint)(tmp&0xFFFFFFFF);
        res[i] = tempint; // <---- Problem statement
        carry=tmp>>32;
        }
        return carry;
      }
      
      
      void wide_mul(uint* res_hi, uint* res_lo, const uint* a, const uint* b)
      {
      
      
        ulong carry=0, acc=0;
        #pragma unroll
          for(uint i=0; i<4; i++){
        #pragma unroll
              for(uint j=0; j<=i; j++){
        ulong tmp=(ulong)(a[j])*b[i-j];
        acc+=tmp;
                  carry+=(acc < tmp);
        }
        res_lo[i]=(uint)(acc&0xFFFFFFFF);
        acc= (carry<<32) | (acc>>32);
        carry=carry>>32;
        }
      
        #pragma unroll
          for(uint i=1; i<4; i++){
        #pragma unroll
              for(uint j=i; j<4; j++){
        ulong tmp=(ulong)(a[j])*b[4-j+i-1];
        acc+=tmp;
                  carry+=(acc < tmp);
        }
        res_hi[i-1]=(uint)(acc&0xFFFFFFFF);
        acc= (carry<<32) | (acc>>32);
        carry=carry>>32;
        }
        res_hi[3]=acc;
      }
      
      
      void wide_copy_global(__global uint *res, const uint *a)
      {
        #pragma unroll
          for(uint i=0;i<8;i++){
        res[i]=a[i];
        }
      }
      
      
      __kernel void bitecoin_miner(ulong roundId,ulong roundSalt,ulong chainHash, uint4 c, uint hashSteps, __global uint* proofBuffer)
      {
          uint workerID = get_global_id(0);
          
          uint cArray[4] = {c.x,c.y,c.z,c.w};
          
          uint x[8] = {workerID,0,(uint)roundId,(uint)roundId,(uint)roundSalt,(uint)roundSalt,(uint)chainHash,(uint)chainHash};
          
          for(uint j=0;j<hashSteps;j++)
          {
              uint tmp[8];
              
              wide_mul(tmp+4, tmp, x, cArray); // cArray; not to be confused with carry.
              
              uint carry=wide_add_vector(x, tmp, x+4);
              
              wide_add_scalar(x+4, tmp+4, carry);
          }
          
          wide_copy_global(proofBuffer+8*workerID,x);
      }
      
      
      
      
      
      
      
      
      
      
      
      

       

      When run I get:

      LogLevel = 2 -> 2
      [MyClient], 1395075385.62, 2, Created log.
      Will try to connect to address Minty at port 4000
      Found 1 platforms
        Platform 0 : Advanced Micro Devices, Inc.
      Choosing platform 0
      Found 2 devices
        Device 0 : Tahiti
        Device 1 : Intel(R) Core(TM) i7-4770K CPU @ 3.50GHz
      Choosing device 0
      LLVM ERROR: Cannot select: 0x855acbc3a0: i32 = setcc 0x855acbcca0, 0x855ac3a080, 0x855ac3a480 [ORD=52] [ID=30]
        0x855acbcca0: i64 = add 0x855ac3a080, 0x855ac3aa80 [ORD=49] [ID=28]
          0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]
            0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]
          0x855ac3aa80: i64 = mul 0x855acbcda0, 0x855ac37450 [ORD=48] [ID=27]
            0x855acbcda0: i64,ch = load 0x855ac2b1d0, 0x855ac37250, 0x855ac3a380<LD4[%scevgep106], zext from i32> [ORD=47] [ID=26]
              0x855ac37250: i32 = add 0x855ac36640, 0x855ac38960 [ORD=45] [ID=25]
                0x855ac36640: i32 = sub 0x855ac37850, 0x855ac37050 [ORD=44] [ID=24]
                  0x855ac37850: i32 = FrameIndex<0> [ORD=41] [ID=1]
                  0x855ac37050: i32 = shl 0x855acbbc90, 0x855ac3a980 [ORD=44] [ID=23]
                    0x855acbbc90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855ac36940 [ORD=43] [ID=18]
                      0x855ac36940: i32 = Register %vreg30 [ORD=43] [ID=3]
                    0x855ac3a980: i32 = Constant<2> [ORD=44] [ID=4]
                0x855ac38960: i32 = Constant<8> [ORD=45] [ID=5]
              0x855ac3a380: i32 = undef [ORD=46] [ID=6]
            0x855ac37450: i64 = zero_extend 0x855acbbd90 [ORD=42] [ID=21]
              0x855acbbd90: i32,ch = CopyFromReg 0x855ac2b1d0, 0x855acbba90 [ORD=42] [ID=17]
                0x855acbba90: i32 = Register %vreg31 [ORD=42] [ID=2]
        0x855ac3a080: i64,ch = CopyFromReg 0x855ac2b1d0, 0x855ac3a680 [ORD=49] [ID=19]
          0x855ac3a680: i64 = Register %vreg33 [ORD=49] [ID=7]
      In function: __OpenCL_bitecoin_miner_kernel
      Press any key to continue . . .
      

       

      If I put it into Kernel Analyzer it just freezes.

       

      Any ideas?

       

      The system is:

      Windows 8.1 64-bit, Visual Studio 2013

      HD7970 Driver Version 13.350.1005.0

      Catalyst 14.2

      AMD APP SDK 2.9

       

      Many Thanks

      Henry