cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

maximmoroz
Journeyman III

KernelAnalyzer crashes with Catalyst 11.6

R6950, Catalyst 11.6, AMD APP 2.4, Windows 7 Ultimate

I am attaching the code it crashes with (I picked the most simple one). If I specify this kernel in clBuildProgram my application just crashes.

If I supply analyzer with simpler code it says "OpenCL Compile Error: Can't find the IL for Cayman".

The kernels (the ones which are compiled) went wild with 11.6 and produce wrong results.

I didn't expirience all this with Catalyst 11.5.

The tragic is that I am unable to rollback to 11.5. Well, CCC shows 11.5, but all the problems remain. And I am unable to rollback to 11.4 at all - I install 11.4 and the system shows that I have 11.5.

#define width 96 #define height 96 #define window_width 13 #define window_height 13 #define feature_map_count 2 #define input_entry_size 18432 #define intermediate_entry_size 9216 __kernel void SubtractiveLocalContrastAverage( const __global float * restrict intermediate2, __global float * restrict intermediate, const __global float * restrict weights, const __global float * restrict adjustments, const uint dimension_size ) { const uint dd = get_global_id(0); if (dd < dimension_size) { const uint entry_id = dd / (width * height); const uint average_id = dd % (width * height); const int initial_dest_y = average_id / width; const int initial_dest_x = average_id % width; const int dest_y = (int)initial_dest_y - (int)(window_height >> 1); const int dest_x = (int)initial_dest_x - (int)(window_width >> 1); const uint input_offset = intermediate_entry_size * entry_id; float sum = 0.0F; for(uint window_y = 0; window_y < window_height; window_y++) { const int y = dest_y + (int)window_y; if ((y >= 0) && (y < height)) { const uint input_offset2 = y * width + input_offset; const uint weights_offset = window_y * window_width; for(uint window_x = 0; window_x < window_width; window_x++) { int x = dest_x + (int)window_x; float w = weights[weights_offset + window_x]; const int a1 = (x >= 0); const int a2 = (x < width); w = (a1 & a2) ? w : 0.0F; x += a1 ? 0 : width; x -= a2 ? 0 : width; sum += intermediate2[input_offset2 + x] * w; } } } intermediate[intermediate_entry_size * entry_id + average_id] = sum; } }

0 Likes
3 Replies
katayama
Journeyman III

If I supply analyzer with simpler code it says "OpenCL Compile Error: Can't find the IL for Cayman".


This problem has workaround. Please see http://forums.amd.com/devforum/messageview.cfm?catid=390&threadid=151770&messid=1244787&parentid=124...

0 Likes

katayama, thank you, it helps. Other issues still unresolved.

0 Likes
maximmoroz
Journeyman III

I managed to compile the kernel I specified in the post by using #ptagma unroll. But I failed to build the kernel with code below.

#define output_width 92 #define output_height 92 #define output_feature_map_count 12 #define input_feature_map_count 2 #define source_width 5 #define source_height 5 #define input_width 96 #define input_height 96 #define weights_size 2400 #define weights_count 600 #define weights_pairs_size 192 #define biases_size 48 #define weights_offsets_size 96 #define output_entry_size 101568 #define input_entry_size 18432 __kernel void Convolution( const __global float * restrict input, __global float * restrict output, const __global float * restrict weights, const __global int * restrict weights_offsets, const __global float * restrict biases, const uint entry_count ) { const uint dd = get_global_id(0); const uint output_neuron_id = dd % (output_width * output_height); uint entry_id = dd / (output_width * output_height); entry_id = entry_id < entry_count ? entry_id : 0; const uint dest_y = output_neuron_id / output_width; const uint dest_x = output_neuron_id % output_width; const uint output_feature_map_id = get_global_id(1); float sum = biases[output_feature_map_id]; const uint weights_offset_offset = output_feature_map_id * input_feature_map_count; uint input_base_offset = (entry_id * input_entry_size) + (dest_y * input_width) + dest_x; for(uint input_feature_map_id = 0; input_feature_map_id < input_feature_map_count; input_feature_map_id++) { const int weights_offset = weights_offsets[weights_offset_offset + input_feature_map_id]; if (weights_offset >= 0) { uint weights_base_offset2 = weights_offset; uint input_base_offset2 = input_base_offset; for(uint source_y = 0; source_y < source_height; source_y++) { for(uint source_x = 0; source_x < source_width; source_x++) { sum += input[input_base_offset2 + source_x] * weights[weights_base_offset2 + source_x]; } weights_base_offset2 += source_width; input_base_offset2 += input_width; } } input_base_offset += (input_width * input_height); } output[(entry_id * output_entry_size) + (output_feature_map_id * output_width * output_height) + output_neuron_id] = sum; }

0 Likes