Hi!
I have a kernel like this:
__kernel void vzPowx_dc(__global const double *aSrc, const int aSrcIdx,
const double bRe, const double bIm,
__global double *Dst, const int DstIdx,
const int Length)
{
dobule Re, Im, XRe, XIm, ExpX;
size_t i = get_global_id(0);
XRe = aSrc[aSrcIdx + i];
Re = log(fabs(XRe));
Im = atan2(0, XRe);
XRe = Re * bRe - Im * bIm;
XIm = Im * bRe + Re * bIm;
ExpX = exp(XRe);
Im = sincos(XIm, &Re);
Re *= ExpX;
Im *= ExpX;
Dst[2*(DstIdx + i)] = Re;
Dst[2*(DstIdx + i)+1] = Im;
}
The code running via AMD Open CL using CPU as Target runs with only 4 numerals of precision (single precision typical) instead of 8.
Thanks!
Atmapuri