cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

Hellblau
Adept I

Kernel build fails with Dangerous Barrier Opt Detected

Kernels group size gets limited to one wavefront.

Hello,

i tried the 2.1 SDK to check if my OpenCL code will also run on ATI platforms. Sadly i get the following warnings when compiling some of my kernels:

Warning:W001:Dangerous Barrier Opt Detected!
Warning:W000:Barrier caused limited groupsize

This limits the group size to one wavefront which is quite inefficient. I attached the most simple kernel that exibits the problem. This particular kernel writes indices grouped to global memory when a flag is set. The offending barrier is the first CLK_GLOBAL_MEM_FENCE one. When the loop before the barrier is removed the warning goes away.

So any help to work around this problem would be appreciated.

#define REORDER_DIM 256 __kernel /*attribute__( ( reqd_work_group_size( REORDER_DIM, 1, 1 ) ) )*/ void motion_estimation_reorder_candidate_vectors( __global int *pi_indices, __global int *pi_flags, int i_size ) { int i_local_id, i_idx, i_local_idx, i_offset, i_slot_idx, i_flag; volatile int i_dummy; __local unsigned int rgui_indices_block[ REORDER_DIM * 2 ]; __local unsigned int i_indices_offset; i_local_id = get_local_id( 0 ); if( i_local_id == 0 ) { i_indices_offset = 0; } rgui_indices_block[ i_local_id ] = 0; i_slot_idx = i_local_id + REORDER_DIM; for( i_idx = 0; i_idx < i_size; i_idx += REORDER_DIM ) { i_local_idx = min( ( i_local_id + i_idx ), i_size ); pi_indices[ i_local_idx ] = i_size; } barrier(CLK_GLOBAL_MEM_FENCE); for( i_idx = 0; i_idx < i_size; i_idx += REORDER_DIM ) { i_local_idx = i_local_id + i_idx; if( i_local_idx < i_size ) { i_flag = pi_flags[ i_local_idx ]; } else { i_flag = 0; } rgui_indices_block[ i_slot_idx ] = i_flag; for( i_offset = 1; i_offset < REORDER_DIM; i_offset *= 2 ) { unsigned int i_sum; barrier(CLK_LOCAL_MEM_FENCE); i_sum = rgui_indices_block[ i_slot_idx ] + rgui_indices_block[ i_slot_idx - i_offset ]; barrier(CLK_LOCAL_MEM_FENCE); rgui_indices_block[ i_slot_idx ] = i_sum; } if( i_flag ) { unsigned int i_compacted_idx; i_compacted_idx = rgui_indices_block[ i_slot_idx ] + i_indices_offset; pi_indices[ i_compacted_idx ] = i_local_idx; } barrier(CLK_LOCAL_MEM_FENCE); if( i_local_id == REORDER_DIM - 1 ) { i_indices_offset = i_indices_offset + rgui_indices_block[ i_slot_idx ]; } } }

0 Likes
14 Replies

Hellblau,
This is a known issue that we are working on fixing. Try to remove if/else statements around barriers to see if that fixes the issue.

0 Likes

hi, I'm getting those warnings too, but I don't have any barrier inside if/else. what could it be?

0 Likes

mux85,

Could you post your code?

0 Likes

here it is the code of the problematic function.

rgb and average are respectively a structure and a function defined by me

 

kernel void ForegroundEdgesKernel( global read_only rgb * buf_in, global read_only rgb * buf_bg, global write_only rgb * buf_out, global bool * buf_temp, read_only uchar tR, read_only uchar tG, read_only uchar tB) { ushort i = get_global_id(0); ushort j = get_global_id(1); ushort h_out = get_global_size(0); ushort w_out = get_global_size(1); ushort w_in = w_out*2; uint pos_in = w_in*i*2+j*2; uint pos_ul = w_out*(i-1)+(j-1); uint pos_u = pos_ul+1; uint pos_ur = pos_u+1; uint pos_l = pos_ul+w_out; uint pos_out = pos_l+1; uint pos_r = pos_out+1; uint pos_dl = pos_l+w_out; uint pos_d = pos_dl+1; uint pos_dr = pos_d+1; //down-scaling of the frame rgb matr[2][2]; matr[0][0] = buf_in[pos_in]; matr[0][1] = buf_in[pos_in+1]; matr[1][0] = buf_in[pos_in+w_in]; matr[1][1] = buf_in[pos_in+w_in+1]; rgb a = average(matr); //thresholding uchar difR = abs_diff(a.r,buf_bg[pos_out].r); uchar difG = abs_diff(a.g,buf_bg[pos_out].g); uchar difB = abs_diff(a.b,buf_bg[pos_out].b); if(difR>tR || difG>tG || difB>tB) { buf_temp[pos_out] = true; } else { buf_temp[pos_out] = false; } bool morph_temp; //dilation morph_temp=buf_temp[pos_out]; barrier(CLK_GLOBAL_MEM_FENCE); if(i!=0 && i!=get_global_size(0) && j!=0 && j!=get_global_size(1)) { morph_temp = buf_temp[pos_ul] || buf_temp[pos_u] || buf_temp[pos_ur] || buf_temp[pos_l] || buf_temp[pos_out] || buf_temp[pos_r] || buf_temp[pos_dl] || buf_temp[pos_d] || buf_temp[pos_dr]; } barrier(CLK_GLOBAL_MEM_FENCE); buf_temp[pos_out] = morph_temp; //erosion morph_temp=buf_temp[pos_out]; barrier(CLK_GLOBAL_MEM_FENCE); if(i!=0 && i!=get_global_size(0) && j!=0 && j!=get_global_size(1)) { morph_temp = buf_temp[pos_ul] && buf_temp[pos_u] && buf_temp[pos_ur] && buf_temp[pos_l] && buf_temp[pos_out] && buf_temp[pos_r] && buf_temp[pos_dl] && buf_temp[pos_d] && buf_temp[pos_dr]; } barrier(CLK_GLOBAL_MEM_FENCE); buf_temp[pos_out] = morph_temp; //edge detection if(i!=0 && i!=get_global_size(0) && j!=0 && j!=get_global_size(1) && buf_temp[pos_out]) { if(buf_temp[pos_ul] && buf_temp[pos_u] && buf_temp[pos_ur] && buf_temp[pos_l] && buf_temp[pos_r] && buf_temp[pos_dl] && buf_temp[pos_d] && buf_temp[pos_dr]) { buf_out[pos_out].r = buf_out[pos_out].g = buf_out[pos_out].b = 0; } else { buf_out[pos_out].r = buf_out[pos_out].g = buf_out[pos_out].b = 255; } } else { buf_out[pos_out].r = buf_out[pos_out].g = buf_out[pos_out].b = 0; } }

0 Likes

can anyone see where the problem in this code is? thanks

0 Likes

Mux85, the issue is being looked upon by developers. Will get back to you as soon as I get a response from them

0 Likes

ok, thanks

0 Likes

any progress about this issue?

0 Likes

mux85,
This is still not a solved issue and the only work-around is to remove flow control from around the barrier instruction.
i.e.
if
else
endif
barrier
if
else
endif

should turn into something like:
select() for assignment 1
select() for assignment 2
select() for assignment 3
barrier
select() for assignment 4

This might work around the issue.

Remeber, flow control on ATI hardware can be a fairly large performance hit, so you want to eliminate it whenever possible.
0 Likes

thanks for the reply. i think that in this case i can't renounce to do this flow control. i know about the performance hit. does the select function improve perfomance in some way?

0 Likes

Originally posted by: mux85 does the select function improve perfomance in some way?

 

0 Likes

yes

0 Likes

Warning:W000:Barrier caused limited groupsize

 

I'm getting this warning and I have no if near my barriers. What should I do?

 

 

0 Likes

Barsik107,
Without having access to your kernel code, we can't give recommendations on how to fix it.
0 Likes