I have a kernel like in code area, one writing 0.0f to float4 and another writing z with instance().x for debugging
The myufy_wrong producing wrong result!
kernel void myufy_wrong( int row, float a, float4 input_fuzzy_numbers[][], float4 vec_ref<>, out float4 myu<>) { int column = instance().x; float4 fuzz1 = input_fuzzy_numbers[row][column]; float4 fuzz2 = vec_ref; float tp1,tp2; float posisi_x; if (fuzz1.x == fuzz2.x) // berarti posisi sudah sama persih { myu = float4(1.0f,fuzz1.w,fuzz2.w,0.0f); } else if (fuzz1.x > fuzz2.x) // rumus a { posisi_x = (fuzz1.x - fuzz1.z) * fuzz2.y; posisi_x = posisi_x / (fuzz2.y- fuzz2.x); posisi_x = posisi_x + fuzz1.z; posisi_x = posisi_x / ( 1.0f + (fuzz1.x- fuzz1.z)/(fuzz2.y- fuzz2.x)); tp1 = (fuzz1.y- posisi_x)/(fuzz1.y- fuzz1.x); tp2 = (fuzz2.y- posisi_x)/(fuzz2.y- fuzz2.x); } else // if (f1->fuzzy_mean < f2->fuzzy_mean) // rumus b { posisi_x = (fuzz2.x - fuzz2.z) * fuzz1.y; posisi_x = posisi_x / (fuzz1.y - fuzz1.x); posisi_x = posisi_x + fuzz2.z; posisi_x = posisi_x / ( 1.0f + (fuzz2.x - fuzz2.z)/(fuzz1.y - fuzz1.x)); tp1 = (fuzz1.y - posisi_x)/(fuzz1.y - fuzz1.x); tp2 = (fuzz2.x- posisi_x)/(fuzz2.x - fuzz2.z); } if ((tp1 > 0.0f) && (tp1 <= 1.0f)) { myu = float4(tp1,fuzz1.w,fuzz2.w,0.0f); } else if ((tp2>0.0f) && (tp2<=1.0f)) { myu = float4(tp2,fuzz1.w,fuzz2.w,0.0f); } else { myu = float4(0.0f,fuzz1.w,fuzz2.w,0.0f); } } kernel void myufy_correct( int row, float a, float4 input_fuzzy_numbers[][], float4 vec_ref<>, out float4 myu<>) { int column = instance().x; float4 fuzz1 = input_fuzzy_numbers[row][column]; float4 fuzz2 = vec_ref; float tp1,tp2; float posisi_x; if (fuzz1.x == fuzz2.x) // berarti posisi sudah sama persih { myu = float4(1.0f,fuzz1.w,fuzz2.w,(float)column); } else if (fuzz1.x > fuzz2.x) // rumus a { posisi_x = (fuzz1.x - fuzz1.z) * fuzz2.y; posisi_x = posisi_x / (fuzz2.y- fuzz2.x); posisi_x = posisi_x + fuzz1.z; posisi_x = posisi_x / ( 1.0f + (fuzz1.x- fuzz1.z)/(fuzz2.y- fuzz2.x)); tp1 = (fuzz1.y- posisi_x)/(fuzz1.y- fuzz1.x); tp2 = (fuzz2.y- posisi_x)/(fuzz2.y- fuzz2.x); } else // if (f1->fuzzy_mean < f2->fuzzy_mean) // rumus b { posisi_x = (fuzz2.x - fuzz2.z) * fuzz1.y; posisi_x = posisi_x / (fuzz1.y - fuzz1.x); posisi_x = posisi_x + fuzz2.z; posisi_x = posisi_x / ( 1.0f + (fuzz2.x - fuzz2.z)/(fuzz1.y - fuzz1.x)); tp1 = (fuzz1.y - posisi_x)/(fuzz1.y - fuzz1.x); tp2 = (fuzz2.x- posisi_x)/(fuzz2.x - fuzz2.z); } if ((tp1 > 0.0f) && (tp1 <= 1.0f)) { myu = float4(tp1,fuzz1.w,fuzz2.w,(float)column); } else if ((tp2>0.0f) && (tp2<=1.0f)) { myu = float4(tp2,fuzz1.w,fuzz2.w,(float)column); } else { myu = float4(0.0f,fuzz1.w,fuzz2.w,(float)column); } }
What results do you see with CPU backend?
CPU backend shows same result for either myufy_wrong and myufy_correct
CAL backend only correct for myufy_correct
Funny to think the printed output is correct, no error at all using any backend or any myufy function
Now it produce wrong result for either function
Maybe some memory leak happen, sigh