cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

boxerab
Challenger

Exception calling clBuildProgram on my kernel

This must be karma

My system:

Windows 7 64 bits with HD7700 card

Latest 14.9.2 Beta Driver

When I try to compile the following kernel with clBuildProgram, I get the following exception:

Unhandled exception at 0x000007FED7C9BEA2 (amdocl64.dll) in Roger.exe: 0xC0000005: Access violation reading location 0x0000000000000018.

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#define CODE_BLOCK_WIDTH 32

#define CODE_BLOCK_HEIGHT 32

#define BOUNDARY 1

#define BOUNDARY_X2 2

#define BUFFER_STRIDE 34

#define BUFFER_SIZE 640

#define CURRENT_LOCATION_OFFSET 0

#define NBH_OFFSET 34

#define SIGMA_OLD_OFFSET 68

#define SIGMA_NEW_OFFSET 102

#define CXD_BUFFER_OFFSET 137

#define INPUT_CODE_BLOCK_SIZE 512

#define STRIPE_COLUMN_MASK 0xF

#define NBH_FULL_MASK 7

#define NBH_END_MASK  8

#define SET_BIT(val,position) (val) = (val) | (1 << (position));

#define CLEAR_BIT(val,position) (val) = (val) & ~(1 << (position));

////////////////////

// Context Variables

#define SPP 0

#define MRP 1

#define CUP 2

#define CUP_RLC 3

void kernel run(global uint* restrict idata, global uint* restrict msb, uint numChannels, uint width, uint height, uint decompositionLevels) {

if (get_global_id(0) >= width || get_global_id(1) >= (height >> 5) )

return;

// state + context-decision buffer

local uint buff[BUFFER_SIZE];

//////////////////////////////////////////////////////////////////////////

// calculate band:

// 0 indicates LL, 1 indicates LH or HL, and 2 indicates HH

int x = get_global_id(0);

int y = get_global_id(1);

int w = width;

    int h = height;

int wNext = w >>1;

int hNext = h >>1;

int band = 0;  // initialize to LL band of highest decomposition level (== decompositionLevels)

// iterator through decomposition level 1 through decompositionLevels-1

for (int i = 1; i < decompositionLevels; ++i) {

band += ( (x >= wNext && x < w) ||( y >= hNext && y < h) ) * (  x/wNext + y/hNext );

x >>=1;

y >>=1;

w = wNext;

h = hNext;

wNext >>= 1;

hNext >>=1;

}

//////////////////////////////////////////////////////////////////////////////

// iterator through all channels

for (int channelNumber = 0; channelNumber < numChannels; ++channelNumber) {

int blockGridX = get_global_id(0) / CODE_BLOCK_WIDTH;

int blockGridY = get_global_id(1)/CODE_BLOCK_HEIGHT;

int blockGridOffset =  blockGridX + blockGridY * get_num_groups(0);

uint msbVal =  (msb[blockGridOffset] >> (channelNumber * 8)) & 0xFF;

if (msbVal == 0)

return;

// pointer to first row of current input code block

global uint* src =  idata + (channelNumber * (get_num_groups(0)*get_num_groups(1)) + blockGridOffset) * INPUT_CODE_BLOCK_SIZE + get_local_id(0);

// read in sign

uint sgn = *src;

src += CODE_BLOCK_WIDTH;

//current code block

uint current = *src;

src += CODE_BLOCK_WIDTH;

uint currentOffset = BOUNDARY + get_local_id(0);

buff[currentOffset] = current;

// nbh is not used in MSB CUP, so no need to initialize

buff[currentOffset + SIGMA_OLD_OFFSET] = current;

buff[currentOffset + SIGMA_NEW_OFFSET] = 0;

// zero out location boundary columns

if (get_local_id(0) == 0 || get_local_id(0) == CODE_BLOCK_WIDTH-1) {

int delta = -1 + ((get_local_id(0)/(CODE_BLOCK_WIDTH-1)) << 1); // -1 or +1

local uint* buffPtr = buff + currentOffset + delta;

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE;

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE;

buffPtr[0] = 0;

buffPtr += BUFFER_STRIDE ;

buffPtr[0] = 0;

}

barrier(CLK_LOCAL_MEM_FENCE);

// 2. calculate neighbourhood bits

// (since this is MSB CUP, significance value is equivalent to bit value)

uint top = current >> 1;

uint left = buff[get_local_id(0)];

uint leftTop = left >> 1;

uint leftBottom = left << 1;

int nbhMinusBottom = (top | leftTop | left);

int nbh = nbhMinusBottom | leftBottom;

// 3. CUP on 32 locations in column

for (int i = 0; i < 8; ++i) {

int nbhStripe = (nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)));

int stripeBits = (current & (NBH_FULL_MASK << (i*4))) | (current & (NBH_END_MASK << (i*4)));

if (!nbhStripe) {

if (!stripeBits) {

//RLC entire strip

} else {

// count trailing zeros

uchar ctz = 7-clz(stripeBits & -stripeBits);

}

}

    for (int j = 0; j < 3; ++j) {

}

}

// update sigma old

buff[currentOffset + SIGMA_OLD_OFFSET] = current;

barrier(CLK_LOCAL_MEM_FENCE);

// 5 MQ Coding for CUP

if (get_local_id(0) == 0 && get_local_id(1) == 0) {

local uint* cxdPtr      = buff + CXD_BUFFER_OFFSET;

for (int i = 0; i < CODE_BLOCK_HEIGHT>>2; ++i) {

for (int j = 0; j < CODE_BLOCK_WIDTH; ++j) {

cxdPtr[0] = 0;

cxdPtr++;

}

cxdPtr+= BOUNDARY_X2;

}

}

local char blockVote;

local char zeroBitplane;

for (int i = 0; i < msbVal-2; ++i) {

blockVote = 0;

zeroBitplane = 0;

// 1. read in next bit, update sigma_old, clear sigma_new and calculate neighbourhood

uint current = *src;

src += CODE_BLOCK_WIDTH;

buff[currentOffset] = current;

uint sigmaOld = buff[currentOffset + SIGMA_OLD_OFFSET] | buff[currentOffset + SIGMA_NEW_OFFSET];

buff[currentOffset + SIGMA_OLD_OFFSET] = sigmaOld;

uint sigmaNew = 0;

barrier(CLK_LOCAL_MEM_FENCE);

// 2. preprocess bit plane

// calculate neighbourhood bits

uint left = buff[get_local_id(0)];

uint leftTop = left >> 1;

uint leftBottom = left << 1;

uint right = buff[get_local_id(0) + BOUNDARY_X2];

uint rightTop = right >> 1;

uint rightBottom = right << 1;

uint top = current >> 1;

uint bottom = current << 1;

int nbhMinusBottom =  rightTop | top | leftTop | left |  right;

int nbh  = nbhMinusBottom | leftBottom | bottom | rightBottom;

// 3. update significance on column

int index = 0;

for (int i = 0; i < 8; ++i) {

int nbhStripe = ((nbh & (NBH_FULL_MASK << (i*4))) | (nbhMinusBottom & (NBH_END_MASK << (i*4)))) >> (i*4);

int stripeBits = current >> (i*4);

for (int j = 0; j < 3; ++j) {

  

if ( (stripeBits&1) && (nbhStripe&1) && !(sigmaOld&1) ) {

    SET_BIT(sigmaNew, index);

blockVote = 1;

} else {

CLEAR_BIT(sigmaNew, index);

}

nbhStripe >>= 1;

stripeBits >>= 1;

sigmaOld >>= 1;

index++;

}

}

buff[currentOffset + SIGMA_NEW_OFFSET] = sigmaNew;

while (blockVote) {

blockVote = 0;

barrier(CLK_LOCAL_MEM_FENCE);

}

//2. bpc

for (int i = 0; i < 8; ++i) {

for (int j = 0; j < 3; ++j) {

}

}

   src += CODE_BLOCK_WIDTH;

}

}

}

////////////////////////////////////////////////////////////////////////////////////////////////

0 Likes
12 Replies
dipak
Big Boss

Please can you try to build the kernel without optimization [i.e. pass "-cl-opt-disable" or "-O0" flag during clBuildProgram call] and check whether it works or not?


Regards,

0 Likes

Dipak,

Thanks, but I really don't understand this kind of response from you.

An exception when compiling is clearly a bug in the AMD compiler.

And I have given you a reproducer. So, please try to reproduce it!

Even if I try your suggestion, I will still need to compile the kernel

with optimizations on. So you will still need to reproduce this on your side.

Now, if you do try to reproduce it, and you can't, then that is a different matter, and I will

gladly help to isolate the problem. But, at this stage, please fire up your compiler

and test out my kernel.

Thanks,

Aaron

0 Likes

Hi Aaron,

We really do appreciate you for posting the reproducible code. If we ask you to check something doesn't mean that we're not testing or not willing to test your code. Sometimes few more inputs / extra information from users are greatly helpful to us. Sometimes user confirmation of some testing/checking indicates that we are in same page. We're here to give you our best support but, as you know, its a mutual process.

When I tried to compile your code using CodeXL with 14.9.2 Beta Driver, it failed for Cape Verde (HD 7770) as well as few more devices like Pitcairn, Hawaii, Iceland etc. However, when I set the optimization level "-O0", it worked fine for all devices. Also I tried the same with an earlier driver (released in August may be), and it worked fine with and without optimization. In my previous post, I wanted to check whether you've same observation or not. As it seems a compiler bug, I may need to forward this to compiler team. I'll check a little more and then will file an internal bug report against it.

Thanks again for posting the test case.


Regards,

0 Likes

Hi Dipak,

Thanks for your reply, and I am sorry for my outburst.

I am at a critical point in my kernel development, and really need to able to compile

this code for my card.

I would appreciate your letting me know when this gets fixed.

Thanks!

Aaron

0 Likes

Hi Aaron,

We do understand your situation. We'll definitely keep you updated about the progress.

Regards,

0 Likes

I rolled back to Catalyst 14.8 WHQL, and the kernel will compile.

Unfortunately, when the kernel runs, it completely freezes my system,

requiring a reboot. Happens each time the kernel runs.

The kernel runs fine on my Intel CPU, by the way.

What is the best way of trouble shooting this?  I really really need

to get this running .

Thanks,

Aaron

0 Likes

Its hard to suggest anything from here. I've already filed an internal bug report against this compilation issue. However, it may take sometime to resolve. Meanwhile if you want you can share (may send via mail) your project (host + kernel) with us such that we can run and test it at our end.

Regards,

0 Likes

Thanks, Dipak. This is an open source project,  would you be able to run this on windows with HD7700 card? With 14.8 driver?

If it doesn't crash for you, it will help me isolate the problem.

https://github.com/CodecCentral/roger

This is a visual studio 2012 project, configured for x64.

Thanks!

Aaron

0 Likes

Good news: I reinstalled windows, installed Catalyst 14.8 WHQL, and now I can compile and run my kernel

without any problems.

Thanks for your help.

Aaron

0 Likes

Its nice to hear that its working fine. As I mentioned, I've already filed a bug report against 14.9.2 Beta Driver and if get any update, I'll share with you.


Regards,

0 Likes

Hi Dipak,

Is this compiler bug fixed in the upcoming Catalyst Omega drivers? I want to give these drivers a try,

but I am afraid of this bug coming back to haunt me.

Thanks,

Aaron

0 Likes

It seems working fine with catalyst 14.12 (Omega). Please check and let me know your observation.

Regards,

0 Likes