This must be karma
My system:
Windows 7 64 bits with HD7700 card
Latest 14.9.2 Beta Driver
When I try to compile the following kernel with clBuildProgram, I get the following exception:
Unhandled exception at 0x000007FED7C9BEA2 (amdocl64.dll) in Roger.exe: 0xC0000005: Access violation reading location 0x0000000000000018.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CODE_BLOCK_WIDTH 32
#define CODE_BLOCK_HEIGHT 32
#define BOUNDARY 1
#define BOUNDARY_X2 2
#define BUFFER_STRIDE 34
#define BUFFER_SIZE 640
#define CURRENT_LOCATION_OFFSET 0
#define NBH_OFFSET 34
#define SIGMA_OLD_OFFSET 68
#define SIGMA_NEW_OFFSET 102
#define CXD_BUFFER_OFFSET 137
#define INPUT_CODE_BLOCK_SIZE 512
#define STRIPE_COLUMN_MASK 0xF
#define NBH_FULL_MASK 7
#define NBH_END_MASK 8
#define SET_BIT(val,position) (val) = (val) | (1 << (position));
#define CLEAR_BIT(val,position) (val) = (val) & ~(1 << (position));
////////////////////
// Context Variables
#define SPP 0
#define MRP 1
#define CUP 2
#define CUP_RLC 3
void kernel run(global uint* restrict idata, global uint* restrict msb, uint numChannels, uint width, uint height, uint decompositionLevels) {
if (get_global_id(0) >= width || get_global_id(1) >= (height >> 5) )
return;
// state + context-decision buffer
local uint buff[BUFFER_SIZE];
//////////////////////////////////////////////////////////////////////////
// calculate band:
// 0 indicates LL, 1 indicates LH or HL, and 2 indicates HH
int x = get_global_id(0);
int y = get_global_id(1);
int w = width;
int h = height;
int wNext = w >>1;
int hNext = h >>1;
int band = 0; // initialize to LL band of highest decomposition level (== decompositionLevels)
// iterator through decomposition level 1 through decompositionLevels-1
for (int i = 1; i < decompositionLevels; ++i) {
band += ( (x >= wNext && x < w) ||( y >= hNext && y < h) ) * ( x/wNext + y/hNext );
x >>=1;
y >>=1;
w = wNext;
h = hNext;
wNext >>= 1;
hNext >>=1;
}
//////////////////////////////////////////////////////////////////////////////
// iterator through all channels
for (int channelNumber = 0; channelNumber < numChannels; ++channelNumber) {
int blockGridX = get_global_id(0) / CODE_BLOCK_WIDTH;
int blockGridY = get_global_id(1)/CODE_BLOCK_HEIGHT;
int blockGridOffset = blockGridX + blockGridY * get_num_groups(0);
uint msbVal = (msb[blockGridOffset] >> (channelNumber * 8)) & 0xFF;
if (msbVal == 0)
return;
// pointer to first row of current input code block
global uint* src = idata + (channelNumber * (get_num_groups(0)*get_num_groups(1)) + blockGridOffset) * INPUT_CODE_BLOCK_SIZE + get_local_id(0);
// read in sign
uint sgn = *src;
src += CODE_BLOCK_WIDTH;
//current code block
uint current = *src;
src += CODE_BLOCK_WIDTH;
uint currentOffset = BOUNDARY + get_local_id(0);
buff[currentOffset] = current;
// nbh is not used in MSB CUP, so no need to initialize
buff[currentOffset + SIGMA_OLD_OFFSET] = current;
buff[currentOffset + SIGMA_NEW_OFFSET] = 0;
// zero out location boundary columns
if (get_local_id(0) == 0 || get_local_id(0) == CODE_BLOCK_WIDTH-1) {
int delta = -1 + ((get_local_id(0)/(CODE_BLOCK_WIDTH-1)) << 1); // -1 or +1
local uint* buffPtr = buff + currentOffset + delta;
buffPtr[0] = 0;
buffPtr += BUFFER_STRIDE;
buffPtr[0] = 0;
buffPtr += BUFFER_STRIDE;
buffPtr[0] = 0;
buffPtr += BUFFER_STRIDE ;
buffPtr[0] = 0;
}
barrier(CLK_LOCAL_MEM_FENCE);
// 2. calculate neighbourhood bits
// (since this is MSB CUP, significance value is equivalent to bit value)
uint top = current >> 1;
uint left = buff[get_local_id(0)];
uint leftTop = left >> 1;
uint leftBottom = left << 1;
int nbhMinusBottom = (top | leftTop | left);
int nbh = nbhMinusBottom | leftBottom;
// 3. CUP on 32 locations in column
for (int i = 0; i < 8; ++i) {
int nbhStripe = (nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)));
int stripeBits = (current & (NBH_FULL_MASK << (i*4))) | (current & (NBH_END_MASK << (i*4)));
if (!nbhStripe) {
if (!stripeBits) {
//RLC entire strip
} else {
// count trailing zeros
uchar ctz = 7-clz(stripeBits & -stripeBits);
}
}
for (int j = 0; j < 3; ++j) {
}
}
// update sigma old
buff[currentOffset + SIGMA_OLD_OFFSET] = current;
barrier(CLK_LOCAL_MEM_FENCE);
// 5 MQ Coding for CUP
if (get_local_id(0) == 0 && get_local_id(1) == 0) {
local uint* cxdPtr = buff + CXD_BUFFER_OFFSET;
for (int i = 0; i < CODE_BLOCK_HEIGHT>>2; ++i) {
for (int j = 0; j < CODE_BLOCK_WIDTH; ++j) {
cxdPtr[0] = 0;
cxdPtr++;
}
cxdPtr+= BOUNDARY_X2;
}
}
local char blockVote;
local char zeroBitplane;
for (int i = 0; i < msbVal-2; ++i) {
blockVote = 0;
zeroBitplane = 0;
// 1. read in next bit, update sigma_old, clear sigma_new and calculate neighbourhood
uint current = *src;
src += CODE_BLOCK_WIDTH;
buff[currentOffset] = current;
uint sigmaOld = buff[currentOffset + SIGMA_OLD_OFFSET] | buff[currentOffset + SIGMA_NEW_OFFSET];
buff[currentOffset + SIGMA_OLD_OFFSET] = sigmaOld;
uint sigmaNew = 0;
barrier(CLK_LOCAL_MEM_FENCE);
// 2. preprocess bit plane
// calculate neighbourhood bits
uint left = buff[get_local_id(0)];
uint leftTop = left >> 1;
uint leftBottom = left << 1;
uint right = buff[get_local_id(0) + BOUNDARY_X2];
uint rightTop = right >> 1;
uint rightBottom = right << 1;
uint top = current >> 1;
uint bottom = current << 1;
int nbhMinusBottom = rightTop | top | leftTop | left | right;
int nbh = nbhMinusBottom | leftBottom | bottom | rightBottom;
// 3. update significance on column
int index = 0;
for (int i = 0; i < 8; ++i) {
int nbhStripe = ((nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)))) >> (i*4);
int stripeBits = current >> (i*4);
for (int j = 0; j < 3; ++j) {
if ( (stripeBits&1) && (nbhStripe&1) && !(sigmaOld&1) ) {
SET_BIT(sigmaNew, index);
blockVote = 1;
} else {
CLEAR_BIT(sigmaNew, index);
}
nbhStripe >>= 1;
stripeBits >>= 1;
sigmaOld >>= 1;
index++;
}
}
buff[currentOffset + SIGMA_NEW_OFFSET] = sigmaNew;
while (blockVote) {
blockVote = 0;
barrier(CLK_LOCAL_MEM_FENCE);
}
//2. bpc
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 3; ++j) {
}
}
src += CODE_BLOCK_WIDTH;
}
}
}
////////////////////////////////////////////////////////////////////////////////////////////////