riza.guntur

Help, how to optimize this code?

Discussion created by riza.guntur on Aug 5, 2009
Latest reply on Aug 25, 2009 by riza.guntur

I have complete the right code, this code works very slow compared to CPU backend or CPU only code. If this can run faster, I might go for bigger problems in 3D stream for storing image.

The following are my code (I really need help), thanks for your cooperation.

#include "brookgenfiles/all_functions_first.h" #include <ctime> #include <iostream> #include <iomanip> #include <fstream> using namespace std; using namespace brook; unsigned int jumlahData = 480; unsigned int jumlahDataSatuOutput = 80; unsigned int jumlahDiSatuGrup = 5; unsigned int jumlahDimensi = 16; unsigned int jumlahOutput = jumlahData / jumlahDataSatuOutput; unsigned int yA = jumlahData; unsigned int yB = yA/jumlahDiSatuGrup; unsigned int yC = 1;//how many last columns to be ignored in input file unsigned int streamSize[] = {jumlahDimensi,yA}; unsigned int streamSizeReduce[] = {jumlahDimensi,yB}; unsigned int streamSizeReduceRef[] = {jumlahDimensi,jumlahOutput}; unsigned int streamSizeMinOfVecCluster[] = {1,jumlahOutput}; unsigned int streamSizeMaxOfMin[] = {1,1}; float alpha = 0.05f; float beta = 0.05f; float gamma = 0.05f; unsigned short rank[3] = {0,1,2}; int num_of_epoch = 1000; Stream<float4> *input_to_fuzzy(float2 *input_array, unsigned int rank, unsigned int *input_stream_size, unsigned int *fuzzy_stream_size) { Stream<float2> input(rank, input_stream_size);//stream input training Stream<float4> *fuzzy_number = new Stream<float4>(rank, fuzzy_stream_size);//x mean, y for max, z for min input.read(input_array); max_min_mean(input,*fuzzy_number); return fuzzy_number; } int main(int argc, char* argv[]) { printf("FNLVQ Program\n"); float2 *temporary_input_container = new float2[jumlahDimensi*yA]; float4 *fuzzy_number_array = new float4[jumlahDimensi*yB]; float4 *vec_ref_array = new float4[jumlahDimensi*jumlahOutput]; float4 *myu_array= new float4[jumlahDimensi*jumlahOutput]; float4 *min_of_cluster_array = new float4[1*jumlahOutput]; float4 *winner_array = new float4[1*1]; memset(temporary_input_container, 0, jumlahDimensi * yA * sizeof(float2)); memset(fuzzy_number_array, 0, jumlahDimensi*yB * sizeof(float4)); memset(vec_ref_array, 0, jumlahDimensi * jumlahOutput * sizeof(float4)); memset(myu_array, 0, jumlahDimensi * jumlahOutput * sizeof(float4)); memset(min_of_cluster_array, 0, 1*jumlahOutput * sizeof(float4)); memset(winner_array, 0, 1*1 * sizeof(float4)); ifstream inFile; inFile.open("480x16.txt"); if (!inFile) { cout << "Unable to open file"; exit(1); // terminate with error } //printf("Isi dari file\n"); for(unsigned int i = 0; i < streamSize[1]; i++)//reading from file { //printf("Row array ke-%d ",i); for(unsigned int j = 0; j < streamSize[0] + yC; j++) { unsigned int index = i * streamSize[0] + j; unsigned int target = i / jumlahDataSatuOutput; float temp; if( (inFile >> temp) && (j < streamSize[0])) { temporary_input_container[index].x = temp;//read input temporary_input_container[index].y = (float) target; //printf("%.8f %.0f ", temporary_input_container[index].x, temporary_input_container[index].y); } //else //{ // for(unsigned int u = index - jumlahDimensi; u < index; u++) // { // temporary_input_container[u].y = temp; // //printf("%d %.8f ",u, temporary_input_container[u].y); // } //} } //printf("\n\n"); } inFile.close(); Stream<float4> *fuzzy_number = input_to_fuzzy(temporary_input_container,rank[2],streamSize,streamSizeReduce); Stream<float4> vec_ref(rank[2], streamSizeReduceRef);//stream of vector reference cluster Stream<float4> myu(rank[2], streamSizeReduceRef);//myu streams Stream<float4> vec_ref_next(rank[2], streamSizeReduceRef); Stream<float4> myu_min(rank[2], streamSizeMinOfVecCluster);//streams of smallest myu in calculated vector reference cluster against fuzzy_number Stream<float4> winner(rank[2], streamSizeMaxOfMin);//biggest of smallest myu //fuzzy_number->write(fuzzy_number_array); //printf("Isi fuzzy number\n"); //for(unsigned int i = 0; i < streamSizeReduce[1]; i++) //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeReduce[0]; j++) // { // unsigned int index = i * jumlahDimensi + j; // printf("%.8f %.8f %.8f %.0f ", fuzzy_number_array[index].x, fuzzy_number_array[index].y, fuzzy_number_array[index].z, fuzzy_number_array[index].w); // } // printf("\n\n"); //} copy4(*fuzzy_number,vec_ref); //printf("Isi reference vector\n"); //vec_ref.write(vec_ref_array); //for(unsigned int i = 0; i < streamSizeReduceRef[1]; i++)//reading from file //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeReduceRef[0]; j++) // { // unsigned int index = i * streamSizeReduceRef[0] + j; // printf("%.8f %.8f %.8f %.0f ",vec_ref_array[index].x, vec_ref_array[index].y, vec_ref_array[index].z, vec_ref_array[index].w); // } // printf("\n\n"); //} //std::cout << "waktu yang dibutuhkan = " << ( ( std::clock() - start ) / (double)CLOCKS_PER_SEC ) <<'\n'; for( int epoch = 0; epoch < 1000; epoch++) { for( int row = 0; row < (int) yB; row++) { myufy(row,alpha,*fuzzy_number,vec_ref,myu); //myu.write(myu_array); //printf("Isi myu\n"); //for(unsigned int i = 0; i < streamSizeReduceRef[1]; i++)//reading from file //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeReduceRef[0]; j++) // { // unsigned int index = i * streamSizeReduceRef[0] + j; // printf("%.8f %.0f %.0f %.0f ",myu_array[index].x, myu_array[index].y, myu_array[index].z, myu_array[index].w); // } // printf("\n\n"); //} myu_min_all(myu,myu_min); //myu_min.write(min_of_cluster_array); //printf("Isi min_of_cluster\n"); //for(unsigned int i = 0; i < streamSizeMinOfVecCluster[1]; i++)//reading from file //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeMinOfVecCluster[0]; j++) // { // unsigned int index = i * streamSizeMinOfVecCluster[0] + j; // printf("%.8f %.0f %.0f %.0f ",min_of_cluster_array[index].x, min_of_cluster_array[index].y, min_of_cluster_array[index].z, min_of_cluster_array[index].w); // } // printf("\n\n"); //} myu_max_min(myu_min,winner); //winner.write(winner_array); //printf("Isi winner_array\n"); //for(unsigned int i = 0; i < streamSizeMaxOfMin[1]; i++)//reading from file //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeMaxOfMin[0]; j++) // { // unsigned int index = i * streamSizeMaxOfMin[0] + j; // printf("%.8f %.0f %.0f %.0f ",winner_array[index].x, winner_array[index].y, winner_array[index].z, winner_array[index].w); // } // printf("\n\n"); //} calc_vec_ref_next(row, alpha, winner, *fuzzy_number, vec_ref, vec_ref_next); //printf("Isi vector reference next\n"); //vec_ref_next.write(vec_ref_array); //for(unsigned int i = 0; i < streamSizeReduceRef[1]; i++)//reading from file //{ // printf("Row array ke-%d ",i); // for(unsigned int j = 0; j < streamSizeReduceRef[0]; j++) // { // unsigned int index = i * streamSizeReduceRef[0] + j; // printf("%.8f %.8f %.8f %.0f ",vec_ref_array[index].x, vec_ref_array[index].y, vec_ref_array[index].z, vec_ref_array[index].w); // } // printf("\n\n"); //} } alpha = 0.9999f * alpha; } delete fuzzy_number; delete[] temporary_input_container; delete[] vec_ref_array; delete[] min_of_cluster_array; delete[] myu_array; delete[] winner_array; delete[] fuzzy_number_array; return 0; } kernel void max_min_mean(float2 input[][], out float4 output<>) { int2 index = instance().xy; int i0 = 5*index.y; int i1 = ++i0; int i2 = ++i1; int i3 = ++i2; int i4 = ++i3; float mean; float temp0 = input[i0][index.x].x; float temp1 = input[i1][index.x].x; float temp2 = input[i2][index.x].x; float temp3 = input[i3][index.x].x; float temp4 = input[i4][index.x].x; float temp_max = temp0; float temp_min = temp0; temp_max = (temp_max>temp1)?temp_max:temp1; temp_max = (temp_max>temp2)?temp_max:temp2; temp_max = (temp_max>temp3)?temp_max:temp3; temp_max = (temp_max>temp4)?temp_max:temp4; temp_min = (temp_min<temp1)?temp_min:temp1; temp_min = (temp_min<temp2)?temp_min:temp2; temp_min = (temp_min<temp3)?temp_min:temp3; temp_min = (temp_min<temp4)?temp_min:temp4; mean = 0.2f*(temp0+temp1+temp2+temp3+temp4); output = float4(mean,temp_max,temp_min,input[i0][index.x].y); } kernel void max_min_median(float2 input[][], out float4 output<>) { int2 index = instance().xy; int i0 = 5*index.y; int i1 = ++i0; int i2 = ++i1; int i3 = ++i2; int i4 = ++i3; float mid; float temp0 = input[i0][index.x].x; float temp1 = input[i1][index.x].x; float temp2 = input[i2][index.x].x; float temp3 = input[i3][index.x].x; float temp4 = input[i4][index.x].x; float temp_max = temp0; float temp_min = temp0; temp_max = (temp_max>temp1)?temp_max:temp1; temp_max = (temp_max>temp2)?temp_max:temp2; temp_max = (temp_max>temp3)?temp_max:temp3; temp_max = (temp_max>temp4)?temp_max:temp4; temp_min = (temp_min<temp1)?temp_min:temp1; temp_min = (temp_min<temp2)?temp_min:temp2; temp_min = (temp_min<temp3)?temp_min:temp3; temp_min = (temp_min<temp4)?temp_min:temp4; mid = 0.5f*(temp_max+temp_min); output = float4(mid,temp_max,temp_min,input[i0][index.x].y); } kernel void copy4(float4 input<>, out float4 output<>) { output = input; } kernel void myufy(int row, float a, float4 input_fuzzy_numbers[][], float4 vec_ref<>, out float4 myu<>) { int column = instance().x; float4 fuzz1 = input_fuzzy_numbers[row][column]; float4 fuzz2 = vec_ref; if(fuzz1.x <= fuzz2.x) { myu = float4(clamp((fuzz1.y - fuzz2.z) / (fuzz2.x - fuzz2.z + fuzz1.y - fuzz1.x),0.0f,1.0f),fuzz1.w,fuzz2.w,0.0f); } else { myu = float4(clamp((fuzz2.y - fuzz1.z) / (fuzz1.x - fuzz1.z + fuzz2.y - fuzz2.x),0.0f,1.0f),fuzz1.w,fuzz2.w,0.0f); } } reduce void myu_min_all(float4 myu<>, reduce float4 myu_min<>) { if(myu.x < myu_min.x) myu_min = myu; } reduce void myu_max_min(float4 myu_min<>, reduce float4 winner<>) { if(myu_min.x > winner.x) winner = myu_min; } kernel void calc_vec_ref_next( int row, float a, float4 winner[][], float4 input_fuzzy_numbers[][], float4 vec_ref<>, out float4 vec_ref_next<>) { float k = 1.0f - a; float n = 0.01f * ( 1.0f - a ); float4 temp0 = winner[1][1]; float4 fuzz1 = input_fuzzy_numbers[row][instance().x]; float4 fuzz2 = vec_ref; if(temp0.z != vec_ref.w) { vec_ref_next = vec_ref; } else { if(temp0.x == 0.0f) { vec_ref_next.xw = fuzz2.xw; vec_ref_next.y = fuzz2.x + 1.1f * ( fuzz2.y - fuzz2.x ); vec_ref_next.z = fuzz2.x - 1.1f * ( fuzz2.x - fuzz2.z ); } else { if(fuzz1.w != fuzz2.w) { vec_ref_next.x = fuzz2.x - a * (1.0f - temp0.x) * (fuzz1.x - fuzz2.x); vec_ref_next.y = fuzz2.y + ( 1.0f - temp0.x ) * ( 1.0f - k ) * (fuzz2.y - fuzz2.x); vec_ref_next.z = fuzz2.z - ( 1.0f - temp0.x ) * ( 1.0f - k ) * (fuzz2.x - fuzz2.z); vec_ref_next.w = fuzz2.w; } else { vec_ref_next.x = fuzz2.x + a * (1.0f - temp0.x) * (fuzz1.x - fuzz2.x); vec_ref_next.y = fuzz2.y + ( 1.0f + temp0.x ) * (1.0f + n) * (fuzz2.y - fuzz2.x); vec_ref_next.z = fuzz2.z - ( 1.0f - temp0.x ) * (1.0f - n) * (fuzz2.x - fuzz2.z); vec_ref_next.w = fuzz2.w; } } } }

Outcomes