AnsweredAssumed Answered

Exception calling clBuildProgram on my kernel

Question asked by boxerab on Oct 27, 2014
Latest reply on Dec 10, 2014 by dipak

This must be karma

 

My system:

Windows 7 64 bits with HD7700 card

Latest 14.9.2 Beta Driver

 

 

When I try to compile the following kernel with clBuildProgram, I get the following exception:

 

Unhandled exception at 0x000007FED7C9BEA2 (amdocl64.dll) in Roger.exe: 0xC0000005: Access violation reading location 0x0000000000000018.

 

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#define CODE_BLOCK_WIDTH 32

#define CODE_BLOCK_HEIGHT 32

 

#define BOUNDARY 1

#define BOUNDARY_X2 2

 

#define BUFFER_STRIDE 34

#define BUFFER_SIZE 640

 

#define CURRENT_LOCATION_OFFSET 0

#define NBH_OFFSET 34

#define SIGMA_OLD_OFFSET 68

#define SIGMA_NEW_OFFSET 102

#define CXD_BUFFER_OFFSET 137

 

#define INPUT_CODE_BLOCK_SIZE 512

 

#define STRIPE_COLUMN_MASK 0xF

#define NBH_FULL_MASK 7

#define NBH_END_MASK  8

 

#define SET_BIT(val,position) (val) = (val) | (1 << (position));

#define CLEAR_BIT(val,position) (val) = (val) & ~(1 << (position));

 

////////////////////

// Context Variables

 

#define SPP 0

#define MRP 1

#define CUP 2

#define CUP_RLC 3

 

void kernel run(global uint* restrict idata, global uint* restrict msb, uint numChannels, uint width, uint height, uint decompositionLevels) {

 

if (get_global_id(0) >= width || get_global_id(1) >= (height >> 5) )

return;

 

// state + context-decision buffer

local uint buff[BUFFER_SIZE];

 

//////////////////////////////////////////////////////////////////////////

// calculate band:

// 0 indicates LL, 1 indicates LH or HL, and 2 indicates HH

int x = get_global_id(0);

int y = get_global_id(1);

int w = width;

    int h = height;

int wNext = w >>1;

int hNext = h >>1;

 

int band = 0;  // initialize to LL band of highest decomposition level (== decompositionLevels)

 

// iterator through decomposition level 1 through decompositionLevels-1

for (int i = 1; i < decompositionLevels; ++i) {

band += ( (x >= wNext && x < w) ||( y >= hNext && y < h) ) * (  x/wNext + y/hNext );

x >>=1;

y >>=1;

w = wNext;

h = hNext;

wNext >>= 1;

hNext >>=1;

}

//////////////////////////////////////////////////////////////////////////////

 

// iterator through all channels

for (int channelNumber = 0; channelNumber < numChannels; ++channelNumber) {

int blockGridX = get_global_id(0) / CODE_BLOCK_WIDTH;

int blockGridY = get_global_id(1)/CODE_BLOCK_HEIGHT;

int blockGridOffset =  blockGridX + blockGridY * get_num_groups(0);

 

uint msbVal =  (msb[blockGridOffset] >> (channelNumber * 8)) & 0xFF;

if (msbVal == 0)

return;

 

// pointer to first row of current input code block

global uint* src =  idata + (channelNumber * (get_num_groups(0)*get_num_groups(1)) + blockGridOffset) * INPUT_CODE_BLOCK_SIZE + get_local_id(0);

 

// read in sign

uint sgn = *src;

src += CODE_BLOCK_WIDTH;

 

 

//current code block

uint current = *src;

src += CODE_BLOCK_WIDTH;

uint currentOffset = BOUNDARY + get_local_id(0);

 

buff[currentOffset] = current;

// nbh is not used in MSB CUP, so no need to initialize

buff[currentOffset + SIGMA_OLD_OFFSET] = current;

buff[currentOffset + SIGMA_NEW_OFFSET] = 0;

 

// zero out location boundary columns

if (get_local_id(0) == 0 || get_local_id(0) == CODE_BLOCK_WIDTH-1) {

 

int delta = -1 + ((get_local_id(0)/(CODE_BLOCK_WIDTH-1)) << 1); // -1 or +1

local uint* buffPtr = buff + currentOffset + delta;

 

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE;

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE;

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE ;

buffPtr[0] = 0;

}

 

barrier(CLK_LOCAL_MEM_FENCE);

 

 

// 2. calculate neighbourhood bits

// (since this is MSB CUP, significance value is equivalent to bit value)

uint top = current >> 1;

uint left = buff[get_local_id(0)];

uint leftTop = left >> 1;

uint leftBottom = left << 1;

 

 

int nbhMinusBottom = (top | leftTop | left);

int nbh = nbhMinusBottom | leftBottom;

 

// 3. CUP on 32 locations in column

for (int i = 0; i < 8; ++i) {

 

int nbhStripe = (nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)));

int stripeBits = (current & (NBH_FULL_MASK << (i*4))) | (current & (NBH_END_MASK << (i*4)));

if (!nbhStripe) {

if (!stripeBits) {

//RLC entire strip

 

} else {

// count trailing zeros

uchar ctz = 7-clz(stripeBits & -stripeBits);

}

}

    for (int j = 0; j < 3; ++j) {

}

}

 

// update sigma old

buff[currentOffset + SIGMA_OLD_OFFSET] = current;

barrier(CLK_LOCAL_MEM_FENCE);

 

// 5 MQ Coding for CUP

if (get_local_id(0) == 0 && get_local_id(1) == 0) {

local uint* cxdPtr      = buff + CXD_BUFFER_OFFSET;

for (int i = 0; i < CODE_BLOCK_HEIGHT>>2; ++i) {

for (int j = 0; j < CODE_BLOCK_WIDTH; ++j) {

cxdPtr[0] = 0;

cxdPtr++;

}

cxdPtr+= BOUNDARY_X2;

}

 

}

 

local char blockVote;

local char zeroBitplane;

 

for (int i = 0; i < msbVal-2; ++i) {

 

blockVote = 0;

zeroBitplane = 0;

 

// 1. read in next bit, update sigma_old, clear sigma_new and calculate neighbourhood

uint current = *src;

src += CODE_BLOCK_WIDTH;

buff[currentOffset] = current;

uint sigmaOld = buff[currentOffset + SIGMA_OLD_OFFSET] | buff[currentOffset + SIGMA_NEW_OFFSET];

buff[currentOffset + SIGMA_OLD_OFFSET] = sigmaOld;

uint sigmaNew = 0;

 

barrier(CLK_LOCAL_MEM_FENCE);

 

// 2. preprocess bit plane

 

// calculate neighbourhood bits

uint left = buff[get_local_id(0)];

uint leftTop = left >> 1;

uint leftBottom = left << 1;

 

uint right = buff[get_local_id(0) + BOUNDARY_X2];

uint rightTop = right >> 1;

uint rightBottom = right << 1;

 

uint top = current >> 1;

uint bottom = current << 1;

 

int nbhMinusBottom =  rightTop | top | leftTop | left |  right;

int nbh  = nbhMinusBottom | leftBottom | bottom | rightBottom;

 

// 3. update significance on column

int index = 0;

for (int i = 0; i < 8; ++i) {

int nbhStripe = ((nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)))) >> (i*4);

int stripeBits = current >> (i*4);

for (int j = 0; j < 3; ++j) {

  

if ( (stripeBits&1) && (nbhStripe&1) && !(sigmaOld&1) ) {

    SET_BIT(sigmaNew, index);

blockVote = 1;

} else {

CLEAR_BIT(sigmaNew, index);

}

nbhStripe >>= 1;

stripeBits >>= 1;

sigmaOld >>= 1;

index++;

}

}

buff[currentOffset + SIGMA_NEW_OFFSET] = sigmaNew;

while (blockVote) {

 

blockVote = 0;

barrier(CLK_LOCAL_MEM_FENCE);

 

}

 

//2. bpc

for (int i = 0; i < 8; ++i) {

for (int j = 0; j < 3; ++j) {

}

}

   src += CODE_BLOCK_WIDTH;

}

}

}

 

////////////////////////////////////////////////////////////////////////////////////////////////

Outcomes