AnsweredAssumed Answered

clBuildProgram prints warnings when compiling for RDNA

Question asked by elad on Feb 6, 2020
Latest reply on Feb 7, 2020 by dipak

I am using Radeon Pro W5700 to run kernels produced by clfft library.

 

When clfft compiles its kernels, it seems that calling clBuildProgram prints unspecified warnings to the console output:

 

"1 warning generated"

Here is an output when using rga tool to compile the kernels generated by clfft

Building for gfx1010... 1 warning generated.
succeeded.
Building for gfx1012... 1 warning generated.
succeeded.

 

1. Is there a way to tell what causes clBuildProgram to print those warnings? It also seems that this isn't an intended behavior of clBuildProgram.

 

Here is the full kernel code in case someone else wish to try:

 

 

/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/


__constant float2 twiddles[255] = {
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(9.2387953251128673848313610506011173e-01f, -3.8268343236508978177923268049198668e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(6.1232339957367660358688201472919830e-17f, -1.0000000000000000000000000000000000e+00f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(-9.2387953251128684950543856757576577e-01f, 3.8268343236508967075693021797633264e-01f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(9.9518472667219692873175063141388819e-01f, -9.8017140329560603628777926132897846e-02f),
(float2)(9.8078528040323043057924223830923438e-01f, -1.9509032201612824808378832130983938e-01f),
(float2)(9.5694033573220882438192802510457113e-01f, -2.9028467725446233105301985233381856e-01f),
(float2)(9.8078528040323043057924223830923438e-01f, -1.9509032201612824808378832130983938e-01f),
(float2)(9.2387953251128673848313610506011173e-01f, -3.8268343236508978177923268049198668e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(9.5694033573220882438192802510457113e-01f, -2.9028467725446233105301985233381856e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(6.3439328416364548779426968394545838e-01f, -7.7301045336273688235451118089258671e-01f),
(float2)(9.2387953251128673848313610506011173e-01f, -3.8268343236508978177923268049198668e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(8.8192126434835504955600526955095120e-01f, -4.7139673682599764203970948983624112e-01f),
(float2)(5.5557023301960228867102387084742077e-01f, -8.3146961230254523567140267914510332e-01f),
(float2)(9.8017140329560770162231619906378910e-02f, -9.9518472667219681770944816889823414e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-1.9509032201612819257263709005201235e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(7.7301045336273699337681364340824075e-01f, -6.3439328416364548779426968394545838e-01f),
(float2)(1.9509032201612833135051516819657991e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-4.7139673682599769755086072109406814e-01f, -8.8192126434835504955600526955095120e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(6.1232339957367660358688201472919830e-17f, -1.0000000000000000000000000000000000e+00f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(6.3439328416364548779426968394545838e-01f, -7.7301045336273688235451118089258671e-01f),
(float2)(-1.9509032201612819257263709005201235e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-8.8192126434835493853370280703529716e-01f, -4.7139673682599780857316318360972218e-01f),
(float2)(5.5557023301960228867102387084742077e-01f, -8.3146961230254523567140267914510332e-01f),
(float2)(-3.8268343236508972626808144923415966e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-9.8078528040323043057924223830923438e-01f, -1.9509032201612860890627132448571501e-01f),
(float2)(4.7139673682599780857316318360972218e-01f, -8.8192126434835493853370280703529716e-01f),
(float2)(-5.5557023301960195560411648330045864e-01f, -8.3146961230254534669370514166075736e-01f),
(float2)(-9.9518472667219692873175063141388819e-01f, 9.8017140329560145661780268255824922e-02f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(-9.2387953251128684950543856757576577e-01f, 3.8268343236508967075693021797633264e-01f),
(float2)(2.9028467725446233105301985233381856e-01f, -9.5694033573220893540423048762022518e-01f),
(float2)(-8.3146961230254534669370514166075736e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(-7.7301045336273688235451118089258671e-01f, 6.3439328416364559881657214646111242e-01f),
(float2)(1.9509032201612833135051516819657991e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-9.2387953251128673848313610506011173e-01f, -3.8268343236508989280153514300764073e-01f),
(float2)(-5.5557023301960217764872140833176672e-01f, 8.3146961230254523567140267914510332e-01f),
(float2)(9.8017140329560770162231619906378910e-02f, -9.9518472667219681770944816889823414e-01f),
(float2)(-9.8078528040323043057924223830923438e-01f, -1.9509032201612860890627132448571501e-01f),
(float2)(-2.9028467725446327474259078371687792e-01f, 9.5694033573220860233732310007326305e-01f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(1.0000000000000000000000000000000000e+00f, -0.0000000000000000000000000000000000e+00f),
(float2)(9.9969881869620424996725205346592702e-01f, -2.4541228522912288123603019585061702e-02f),
(float2)(9.9879545620517240500646494183456525e-01f, -4.9067674327418014934565348994510714e-02f),
(float2)(9.9729045667869020697082760307239369e-01f, -7.3564563599667426307959772202593740e-02f),
(float2)(9.9879545620517240500646494183456525e-01f, -4.9067674327418014934565348994510714e-02f),
(float2)(9.9518472667219692873175063141388819e-01f, -9.8017140329560603628777926132897846e-02f),
(float2)(9.8917650996478101443898367506335489e-01f, -1.4673047445536174793190298260014970e-01f),
(float2)(9.9729045667869020697082760307239369e-01f, -7.3564563599667426307959772202593740e-02f),
(float2)(9.8917650996478101443898367506335489e-01f, -1.4673047445536174793190298260014970e-01f),
(float2)(9.7570213003852857003295184767921455e-01f, -2.1910124015686979759287567048886558e-01f),
(float2)(9.9518472667219692873175063141388819e-01f, -9.8017140329560603628777926132897846e-02f),
(float2)(9.8078528040323043057924223830923438e-01f, -1.9509032201612824808378832130983938e-01f),
(float2)(9.5694033573220882438192802510457113e-01f, -2.9028467725446233105301985233381856e-01f),
(float2)(9.9247953459870996706371215623221360e-01f, -1.2241067519921619566325432515441207e-01f),
(float2)(9.7003125319454397423868385885725729e-01f, -2.4298017990326387094413007616822142e-01f),
(float2)(9.3299279883473895669254716267460026e-01f, -3.5989503653498811086564046490821056e-01f),
(float2)(9.8917650996478101443898367506335489e-01f, -1.4673047445536174793190298260014970e-01f),
(float2)(9.5694033573220882438192802510457113e-01f, -2.9028467725446233105301985233381856e-01f),
(float2)(9.0398929312344333819595476597896777e-01f, -4.2755509343028208490977704059332609e-01f),
(float2)(9.8527764238894122161838140527834184e-01f, -1.7096188876030121717164433903235476e-01f),
(float2)(9.4154406518302080630888895029784180e-01f, -3.3688985339222005110926261295389850e-01f),
(float2)(8.7008699110871146054080327303381637e-01f, -4.9289819222978403789880985641502775e-01f),
(float2)(9.8078528040323043057924223830923438e-01f, -1.9509032201612824808378832130983938e-01f),
(float2)(9.2387953251128673848313610506011173e-01f, -3.8268343236508978177923268049198668e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(9.7570213003852857003295184767921455e-01f, -2.1910124015686979759287567048886558e-01f),
(float2)(9.0398929312344333819595476597896777e-01f, -4.2755509343028208490977704059332609e-01f),
(float2)(7.8834642762660622761217155129997991e-01f, -6.1523159058062681925349579614703543e-01f),
(float2)(9.7003125319454397423868385885725729e-01f, -2.4298017990326387094413007616822142e-01f),
(float2)(8.8192126434835504955600526955095120e-01f, -4.7139673682599764203970948983624112e-01f),
(float2)(7.4095112535495921690653631230816245e-01f, -6.7155895484701833009211213720845990e-01f),
(float2)(9.6377606579543984022251379428780638e-01f, -2.6671275747489836538406393628974911e-01f),
(float2)(8.5772861000027211808571792062139139e-01f, -5.1410274419322166128409890006878413e-01f),
(float2)(6.8954054473706705152835638727992773e-01f, -7.2424708295146689174259790888754651e-01f),
(float2)(9.5694033573220882438192802510457113e-01f, -2.9028467725446233105301985233381856e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(6.3439328416364548779426968394545838e-01f, -7.7301045336273688235451118089258671e-01f),
(float2)(9.4952818059303667475035126699367538e-01f, -3.1368174039889151760718277728301473e-01f),
(float2)(8.0320753148064494286728631777805276e-01f, -5.9569930449243335690567846540943719e-01f),
(float2)(5.7580819141784522763316545024281368e-01f, -8.1758481315158371138807069655740634e-01f),
(float2)(9.4154406518302080630888895029784180e-01f, -3.3688985339222005110926261295389850e-01f),
(float2)(7.7301045336273699337681364340824075e-01f, -6.3439328416364548779426968394545838e-01f),
(float2)(5.1410274419322166128409890006878413e-01f, -8.5772861000027211808571792062139139e-01f),
(float2)(9.3299279883473895669254716267460026e-01f, -3.5989503653498811086564046490821056e-01f),
(float2)(7.4095112535495921690653631230816245e-01f, -6.7155895484701833009211213720845990e-01f),
(float2)(4.4961132965460681720770708125201054e-01f, -8.9322430119551521343623790016863495e-01f),
(float2)(9.2387953251128673848313610506011173e-01f, -3.8268343236508978177923268049198668e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(9.1420975570353069095119735720800236e-01f, -4.0524131400498986099734111121506430e-01f),
(float2)(6.7155895484701833009211213720845990e-01f, -7.4095112535495910588423384979250841e-01f),
(float2)(3.1368174039889135107372908350953367e-01f, -9.4952818059303667475035126699367538e-01f),
(float2)(9.0398929312344333819595476597896777e-01f, -4.2755509343028208490977704059332609e-01f),
(float2)(6.3439328416364548779426968394545838e-01f, -7.7301045336273688235451118089258671e-01f),
(float2)(2.4298017990326398196643253868387546e-01f, -9.7003125319454397423868385885725729e-01f),
(float2)(8.9322430119551532445854036268428899e-01f, -4.4961132965460653965195092496287543e-01f),
(float2)(5.9569930449243346792798092792509124e-01f, -8.0320753148064483184498385526239872e-01f),
(float2)(1.7096188876030135594952241717692232e-01f, -9.8527764238894122161838140527834184e-01f),
(float2)(8.8192126434835504955600526955095120e-01f, -4.7139673682599764203970948983624112e-01f),
(float2)(5.5557023301960228867102387084742077e-01f, -8.3146961230254523567140267914510332e-01f),
(float2)(9.8017140329560770162231619906378910e-02f, -9.9518472667219681770944816889823414e-01f),
(float2)(8.7008699110871146054080327303381637e-01f, -4.9289819222978403789880985641502775e-01f),
(float2)(5.1410274419322166128409890006878413e-01f, -8.5772861000027211808571792062139139e-01f),
(float2)(2.4541228522912263837474355909762380e-02f, -9.9969881869620424996725205346592702e-01f),
(float2)(8.5772861000027211808571792062139139e-01f, -5.1410274419322166128409890006878413e-01f),
(float2)(4.7139673682599780857316318360972218e-01f, -8.8192126434835493853370280703529716e-01f),
(float2)(-4.9067674327417785951066520055974252e-02f, -9.9879545620517240500646494183456525e-01f),
(float2)(8.4485356524970711689093150198459625e-01f, -5.3499761988709715332390715047949925e-01f),
(float2)(4.2755509343028219593207950310898013e-01f, -9.0398929312344333819595476597896777e-01f),
(float2)(-1.2241067519921615402989090171104181e-01f, -9.9247953459870996706371215623221360e-01f),
(float2)(8.3146961230254523567140267914510332e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-1.9509032201612819257263709005201235e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(8.1758481315158371138807069655740634e-01f, -5.7580819141784533865546791275846772e-01f),
(float2)(3.3688985339222005110926261295389850e-01f, -9.4154406518302080630888895029784180e-01f),
(float2)(-2.6671275747489830987291270503192209e-01f, -9.6377606579543984022251379428780638e-01f),
(float2)(8.0320753148064494286728631777805276e-01f, -5.9569930449243335690567846540943719e-01f),
(float2)(2.9028467725446233105301985233381856e-01f, -9.5694033573220893540423048762022518e-01f),
(float2)(-3.3688985339222016213156507546955254e-01f, -9.4154406518302069528658648778218776e-01f),
(float2)(7.8834642762660622761217155129997991e-01f, -6.1523159058062681925349579614703543e-01f),
(float2)(2.4298017990326398196643253868387546e-01f, -9.7003125319454397423868385885725729e-01f),
(float2)(-4.0524131400498974997503864869941026e-01f, -9.1420975570353069095119735720800236e-01f),
(float2)(7.7301045336273699337681364340824075e-01f, -6.3439328416364548779426968394545838e-01f),
(float2)(1.9509032201612833135051516819657991e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-4.7139673682599769755086072109406814e-01f, -8.8192126434835504955600526955095120e-01f),
(float2)(7.5720884650648456748456283094128594e-01f, -6.5317284295377675551463880765368231e-01f),
(float2)(1.4673047445536174793190298260014970e-01f, -9.8917650996478101443898367506335489e-01f),
(float2)(-5.3499761988709704230160468796384521e-01f, -8.4485356524970722791323396450025029e-01f),
(float2)(7.4095112535495921690653631230816245e-01f, -6.7155895484701833009211213720845990e-01f),
(float2)(9.8017140329560770162231619906378910e-02f, -9.9518472667219681770944816889823414e-01f),
(float2)(-5.9569930449243291281646861534682103e-01f, -8.0320753148064516491189124280936085e-01f),
(float2)(7.2424708295146700276490037140320055e-01f, -6.8954054473706682948375146224861965e-01f),
(float2)(4.9067674327418125956867811510164756e-02f, -9.9879545620517240500646494183456525e-01f),
(float2)(-6.5317284295377653347003388262237422e-01f, -7.5720884650648467850686529345693998e-01f),
(float2)(7.0710678118654757273731092936941423e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(6.1232339957367660358688201472919830e-17f, -1.0000000000000000000000000000000000e+00f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(6.8954054473706694050605392476427369e-01f, -7.2424708295146689174259790888754651e-01f),
(float2)(-4.9067674327418007995671445087282336e-02f, -9.9879545620517240500646494183456525e-01f),
(float2)(-7.5720884650648467850686529345693998e-01f, -6.5317284295377664449233634513802826e-01f),
(float2)(6.7155895484701833009211213720845990e-01f, -7.4095112535495910588423384979250841e-01f),
(float2)(-9.8017140329560645262141349576268112e-02f, -9.9518472667219692873175063141388819e-01f),
(float2)(-8.0320753148064505388958878029370680e-01f, -5.9569930449243313486107354037812911e-01f),
(float2)(6.5317284295377686653694127016933635e-01f, -7.5720884650648456748456283094128594e-01f),
(float2)(-1.4673047445536163690960052008449566e-01f, -9.8917650996478101443898367506335489e-01f),
(float2)(-8.4485356524970711689093150198459625e-01f, -5.3499761988709715332390715047949925e-01f),
(float2)(6.3439328416364548779426968394545838e-01f, -7.7301045336273688235451118089258671e-01f),
(float2)(-1.9509032201612819257263709005201235e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-8.8192126434835493853370280703529716e-01f, -4.7139673682599780857316318360972218e-01f),
(float2)(6.1523159058062681925349579614703543e-01f, -7.8834642762660622761217155129997991e-01f),
(float2)(-2.4298017990326387094413007616822142e-01f, -9.7003125319454397423868385885725729e-01f),
(float2)(-9.1420975570353069095119735720800236e-01f, -4.0524131400498991650849234247289132e-01f),
(float2)(5.9569930449243346792798092792509124e-01f, -8.0320753148064483184498385526239872e-01f),
(float2)(-2.9028467725446216451956615856033750e-01f, -9.5694033573220893540423048762022518e-01f),
(float2)(-9.4154406518302069528658648778218776e-01f, -3.3688985339222032866501876924303360e-01f),
(float2)(5.7580819141784533865546791275846772e-01f, -8.1758481315158371138807069655740634e-01f),
(float2)(-3.3688985339221994008696015043824445e-01f, -9.4154406518302080630888895029784180e-01f),
(float2)(-9.6377606579543984022251379428780638e-01f, -2.6671275747489847640636639880540315e-01f),
(float2)(5.5557023301960228867102387084742077e-01f, -8.3146961230254523567140267914510332e-01f),
(float2)(-3.8268343236508972626808144923415966e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-9.8078528040323043057924223830923438e-01f, -1.9509032201612860890627132448571501e-01f),
(float2)(5.3499761988709726434620961299515329e-01f, -8.4485356524970700586862903946894221e-01f),
(float2)(-4.2755509343028186286517211556201801e-01f, -9.0398929312344344921825722849462181e-01f),
(float2)(-9.9247953459870996706371215623221360e-01f, -1.2241067519921677853034225336159579e-01f),
(float2)(5.1410274419322166128409890006878413e-01f, -8.5772861000027211808571792062139139e-01f),
(float2)(-4.7139673682599769755086072109406814e-01f, -8.8192126434835504955600526955095120e-01f),
(float2)(-9.9879545620517240500646494183456525e-01f, -4.9067674327417966362308021643912070e-02f),
(float2)(4.9289819222978409340996108767285477e-01f, -8.7008699110871134951850081051816233e-01f),
(float2)(-5.1410274419322155026179643755313009e-01f, -8.5772861000027211808571792062139139e-01f),
(float2)(-9.9969881869620424996725205346592702e-01f, 2.4541228522912079956785902368210373e-02f),
(float2)(4.7139673682599780857316318360972218e-01f, -8.8192126434835493853370280703529716e-01f),
(float2)(-5.5557023301960195560411648330045864e-01f, -8.3146961230254534669370514166075736e-01f),
(float2)(-9.9518472667219692873175063141388819e-01f, 9.8017140329560145661780268255824922e-02f),
(float2)(4.4961132965460659516310215622070245e-01f, -8.9322430119551532445854036268428899e-01f),
(float2)(-5.9569930449243335690567846540943719e-01f, -8.0320753148064494286728631777805276e-01f),
(float2)(-9.8527764238894122161838140527834184e-01f, 1.7096188876030141146067364843474934e-01f),
(float2)(4.2755509343028219593207950310898013e-01f, -9.0398929312344333819595476597896777e-01f),
(float2)(-6.3439328416364537677196722142980434e-01f, -7.7301045336273710439911610592389479e-01f),
(float2)(-9.7003125319454397423868385885725729e-01f, 2.4298017990326381543297884491039440e-01f),
(float2)(4.0524131400498986099734111121506430e-01f, -9.1420975570353069095119735720800236e-01f),
(float2)(-6.7155895484701844111441459972411394e-01f, -7.4095112535495899486193138727685437e-01f),
(float2)(-9.4952818059303667475035126699367538e-01f, 3.1368174039889157311833400854084175e-01f),
(float2)(3.8268343236508983729038391174981371e-01f, -9.2387953251128673848313610506011173e-01f),
(float2)(-7.0710678118654746171500846685376018e-01f, -7.0710678118654757273731092936941423e-01f),
(float2)(-9.2387953251128684950543856757576577e-01f, 3.8268343236508967075693021797633264e-01f),
(float2)(3.5989503653498827739909415868169162e-01f, -9.3299279883473884567024470015894622e-01f),
(float2)(-7.4095112535495888383962892476120032e-01f, -6.7155895484701855213671706223976798e-01f),
(float2)(-8.9322430119551554650314528771559708e-01f, 4.4961132965460626209619476867374033e-01f),
(float2)(3.3688985339222005110926261295389850e-01f, -9.4154406518302080630888895029784180e-01f),
(float2)(-7.7301045336273699337681364340824075e-01f, -6.3439328416364548779426968394545838e-01f),
(float2)(-8.5772861000027211808571792062139139e-01f, 5.1410274419322155026179643755313009e-01f),
(float2)(3.1368174039889157311833400854084175e-01f, -9.4952818059303667475035126699367538e-01f),
(float2)(-8.0320753148064483184498385526239872e-01f, -5.9569930449243346792798092792509124e-01f),
(float2)(-8.1758481315158393343267562158871442e-01f, 5.7580819141784489456625806269585155e-01f),
(float2)(2.9028467725446233105301985233381856e-01f, -9.5694033573220893540423048762022518e-01f),
(float2)(-8.3146961230254534669370514166075736e-01f, -5.5557023301960217764872140833176672e-01f),
(float2)(-7.7301045336273688235451118089258671e-01f, 6.3439328416364559881657214646111242e-01f),
(float2)(2.6671275747489842089521516754757613e-01f, -9.6377606579543984022251379428780638e-01f),
(float2)(-8.5772861000027200706341545810573734e-01f, -5.1410274419322177230640136258443817e-01f),
(float2)(-7.2424708295146700276490037140320055e-01f, 6.8954054473706682948375146224861965e-01f),
(float2)(2.4298017990326398196643253868387546e-01f, -9.7003125319454397423868385885725729e-01f),
(float2)(-8.8192126434835493853370280703529716e-01f, -4.7139673682599780857316318360972218e-01f),
(float2)(-6.7155895484701866315901952475542203e-01f, 7.4095112535495888383962892476120032e-01f),
(float2)(2.1910124015686976983730005485995207e-01f, -9.7570213003852857003295184767921455e-01f),
(float2)(-9.0398929312344333819595476597896777e-01f, -4.2755509343028202939862580933549907e-01f),
(float2)(-6.1523159058062659720889087111572735e-01f, 7.8834642762660644965677647633128799e-01f),
(float2)(1.9509032201612833135051516819657991e-01f, -9.8078528040323043057924223830923438e-01f),
(float2)(-9.2387953251128673848313610506011173e-01f, -3.8268343236508989280153514300764073e-01f),
(float2)(-5.5557023301960217764872140833176672e-01f, 8.3146961230254523567140267914510332e-01f),
(float2)(1.7096188876030135594952241717692232e-01f, -9.8527764238894122161838140527834184e-01f),
(float2)(-9.4154406518302069528658648778218776e-01f, -3.3688985339222032866501876924303360e-01f),
(float2)(-4.9289819222978420443226355018850882e-01f, 8.7008699110871134951850081051816233e-01f),
(float2)(1.4673047445536174793190298260014970e-01f, -9.8917650996478101443898367506335489e-01f),
(float2)(-9.5694033573220882438192802510457113e-01f, -2.9028467725446238656417108359164558e-01f),
(float2)(-4.2755509343028247348783565939811524e-01f, 9.0398929312344311615134984094765969e-01f),
(float2)(1.2241067519921627892998117204115260e-01f, -9.9247953459870996706371215623221360e-01f),
(float2)(-9.7003125319454397423868385885725729e-01f, -2.4298017990326406523315938557061600e-01f),
(float2)(-3.5989503653498794433218677113472950e-01f, 9.3299279883473895669254716267460026e-01f),
(float2)(9.8017140329560770162231619906378910e-02f, -9.9518472667219681770944816889823414e-01f),
(float2)(-9.8078528040323043057924223830923438e-01f, -1.9509032201612860890627132448571501e-01f),
(float2)(-2.9028467725446327474259078371687792e-01f, 9.5694033573220860233732310007326305e-01f),
(float2)(7.3564563599667454063535387831507251e-02f, -9.9729045667869020697082760307239369e-01f),
(float2)(-9.8917650996478101443898367506335489e-01f, -1.4673047445536180344305421385797672e-01f),
(float2)(-2.1910124015687010290420744240691420e-01f, 9.7570213003852845901064938516356051e-01f),
(float2)(4.9067674327418125956867811510164756e-02f, -9.9879545620517240500646494183456525e-01f),
(float2)(-9.9518472667219681770944816889823414e-01f, -9.8017140329560825673382851164205931e-02f),
(float2)(-1.4673047445536230304341529517841991e-01f, 9.8917650996478090341668121254770085e-01f),
(float2)(2.4541228522912263837474355909762380e-02f, -9.9969881869620424996725205346592702e-01f),
(float2)(-9.9879545620517240500646494183456525e-01f, -4.9067674327417966362308021643912070e-02f),
(float2)(-7.3564563599667356919020733130309964e-02f, 9.9729045667869020697082760307239369e-01f),
};


#define fptype float

#define fvect2 float2

#define C8Q 0.70710678118654752440084436210485f

__attribute__((always_inline)) void
FwdRad4B1(float2 *R0, float2 *R2, float2 *R1, float2 *R3)
{

float2 T;

(*R1) = (*R0) - (*R1);
(*R0) = 2.0f * (*R0) - (*R1);
(*R3) = (*R2) - (*R3);
(*R2) = 2.0f * (*R2) - (*R3);

(*R2) = (*R0) - (*R2);
(*R0) = 2.0f * (*R0) - (*R2);
(*R3) = (*R1) + (fvect2)(-(*R3).y, (*R3).x);
(*R1) = 2.0f * (*R1) - (*R3);

T = (*R1); (*R1) = (*R2); (*R2) = T;

}

__attribute__((always_inline)) void
InvRad4B1(float2 *R0, float2 *R2, float2 *R1, float2 *R3)
{

float2 T;

(*R1) = (*R0) - (*R1);
(*R0) = 2.0f * (*R0) - (*R1);
(*R3) = (*R2) - (*R3);
(*R2) = 2.0f * (*R2) - (*R3);

(*R2) = (*R0) - (*R2);
(*R0) = 2.0f * (*R0) - (*R2);
(*R3) = (*R1) + (fvect2)((*R3).y, -(*R3).x);
(*R1) = 2.0f * (*R1) - (*R3);

T = (*R1); (*R1) = (*R2); (*R2) = T;

}

__attribute__((always_inline)) void
FwdPass0(uint rw, uint b, uint me, uint inOffset, uint outOffset, __global float *bufIn, __global float *bufIn2, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3)
{


if(rw)
{
(*R0).x = bufIn[inOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).x = bufIn[inOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).x = bufIn[inOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).x = bufIn[inOffset + ( 0 + me*1 + 0 + 192 )*1];
}

if(rw > 1)
{
(*R0).y = bufIn2[inOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).y = bufIn2[inOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).y = bufIn2[inOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).y = bufIn2[inOffset + ( 0 + me*1 + 0 + 192 )*1];
}
else
{
(*R0).y = 0;
(*R1).y = 0;
(*R2).y = 0;
(*R3).y = 0;
}

 

FwdRad4B1(R0, R1, R2, R3);


if(rw)
{
bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).x;
bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).x;
bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).x;
bufOutRe[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).x;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 0 )*1] = (*R0).y;
bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 1 )*1] = (*R1).y;
bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 2 )*1] = (*R2).y;
bufOutIm[outOffset + ( ((1*me + 0)/1)*4 + (1*me + 0)%1 + 3 )*1] = (*R3).y;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

}

__attribute__((always_inline)) void
FwdPass1(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3)
{

 


{
float2 W = twiddles[3 + 3*((1*me + 0)%4) + 0];
float TR, TI;
TR = (W.x * (*R1).x) - (W.y * (*R1).y);
TI = (W.y * (*R1).x) + (W.x * (*R1).y);
(*R1).x = TR;
(*R1).y = TI;
}

{
float2 W = twiddles[3 + 3*((1*me + 0)%4) + 1];
float TR, TI;
TR = (W.x * (*R2).x) - (W.y * (*R2).y);
TI = (W.y * (*R2).x) + (W.x * (*R2).y);
(*R2).x = TR;
(*R2).y = TI;
}

{
float2 W = twiddles[3 + 3*((1*me + 0)%4) + 2];
float TR, TI;
TR = (W.x * (*R3).x) - (W.y * (*R3).y);
TI = (W.y * (*R3).x) + (W.x * (*R3).y);
(*R3).x = TR;
(*R3).y = TI;
}

FwdRad4B1(R0, R1, R2, R3);


if(rw)
{
bufOutRe[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 0 )*1] = (*R0).x;
bufOutRe[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 4 )*1] = (*R1).x;
bufOutRe[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 8 )*1] = (*R2).x;
bufOutRe[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 12 )*1] = (*R3).x;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
bufOutIm[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 0 )*1] = (*R0).y;
bufOutIm[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 4 )*1] = (*R1).y;
bufOutIm[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 8 )*1] = (*R2).y;
bufOutIm[outOffset + ( ((1*me + 0)/4)*16 + (1*me + 0)%4 + 12 )*1] = (*R3).y;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

}

__attribute__((always_inline)) void
FwdPass2(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __local float *bufOutRe, __local float *bufOutIm, float2 *R0, float2 *R1, float2 *R2, float2 *R3)
{

 


{
float2 W = twiddles[15 + 3*((1*me + 0)%16) + 0];
float TR, TI;
TR = (W.x * (*R1).x) - (W.y * (*R1).y);
TI = (W.y * (*R1).x) + (W.x * (*R1).y);
(*R1).x = TR;
(*R1).y = TI;
}

{
float2 W = twiddles[15 + 3*((1*me + 0)%16) + 1];
float TR, TI;
TR = (W.x * (*R2).x) - (W.y * (*R2).y);
TI = (W.y * (*R2).x) + (W.x * (*R2).y);
(*R2).x = TR;
(*R2).y = TI;
}

{
float2 W = twiddles[15 + 3*((1*me + 0)%16) + 2];
float TR, TI;
TR = (W.x * (*R3).x) - (W.y * (*R3).y);
TI = (W.y * (*R3).x) + (W.x * (*R3).y);
(*R3).x = TR;
(*R3).y = TI;
}

FwdRad4B1(R0, R1, R2, R3);


if(rw)
{
bufOutRe[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 0 )*1] = (*R0).x;
bufOutRe[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 16 )*1] = (*R1).x;
bufOutRe[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 32 )*1] = (*R2).x;
bufOutRe[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 48 )*1] = (*R3).x;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).x = bufOutRe[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
bufOutIm[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 0 )*1] = (*R0).y;
bufOutIm[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 16 )*1] = (*R1).y;
bufOutIm[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 32 )*1] = (*R2).y;
bufOutIm[outOffset + ( ((1*me + 0)/16)*64 + (1*me + 0)%16 + 48 )*1] = (*R3).y;
}


barrier(CLK_LOCAL_MEM_FENCE);

if(rw)
{
(*R0).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 0 )*1];
(*R1).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 64 )*1];
(*R2).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 128 )*1];
(*R3).y = bufOutIm[outOffset + ( 0 + me*1 + 0 + 192 )*1];
}


barrier(CLK_LOCAL_MEM_FENCE);

}

__attribute__((always_inline)) void
FwdPass3(uint rw, uint b, uint me, uint inOffset, uint outOffset, __local float *bufInRe, __local float *bufInIm, __global float2 *bufOut, __global float2 *bufOut2, float2 *R0, float2 *R1, float2 *R2, float2 *R3)
{

 


{
float2 W = twiddles[63 + 3*((1*me + 0)%64) + 0];
float TR, TI;
TR = (W.x * (*R1).x) - (W.y * (*R1).y);
TI = (W.y * (*R1).x) + (W.x * (*R1).y);
(*R1).x = TR;
(*R1).y = TI;
}

{
float2 W = twiddles[63 + 3*((1*me + 0)%64) + 1];
float TR, TI;
TR = (W.x * (*R2).x) - (W.y * (*R2).y);
TI = (W.y * (*R2).x) + (W.x * (*R2).y);
(*R2).x = TR;
(*R2).y = TI;
}

{
float2 W = twiddles[63 + 3*((1*me + 0)%64) + 2];
float TR, TI;
TR = (W.x * (*R3).x) - (W.y * (*R3).y);
TI = (W.y * (*R3).x) + (W.x * (*R3).y);
(*R3).x = TR;
(*R3).y = TI;
}

FwdRad4B1(R0, R1, R2, R3);


bufInRe[inOffset + ( 1*me + 0 + 0 )*1] = (*R0).x;
bufInRe[inOffset + ( 1*me + 0 + 64 )*1] = (*R1).x;
bufInRe[inOffset + ( 1*me + 0 + 128 )*1] = (*R2).x;
bufInRe[inOffset + ( 1*me + 0 + 192 )*1] = (*R3).x;

barrier(CLK_LOCAL_MEM_FENCE);

(*R0).x = bufInRe[inOffset + ( me + 1 )*1];
(*R1).x = bufInRe[inOffset + ( me + 65 )*1];
(*R2).x = bufInRe[inOffset + ( 256 - (me + 1 ) )*1];
(*R3).x = bufInRe[inOffset + ( 256 - (me + 65 ) )*1];
if(rw && !me)
{
bufOut[outOffset].x = bufInRe[inOffset];
bufOut[outOffset].y = 0;
}

barrier(CLK_LOCAL_MEM_FENCE);

bufInIm[inOffset + ( 1*me + 0 + 0 )*1] = (*R0).y;
bufInIm[inOffset + ( 1*me + 0 + 64 )*1] = (*R1).y;
bufInIm[inOffset + ( 1*me + 0 + 128 )*1] = (*R2).y;
bufInIm[inOffset + ( 1*me + 0 + 192 )*1] = (*R3).y;

barrier(CLK_LOCAL_MEM_FENCE);

(*R0).y = bufInIm[inOffset + ( me + 1 )*1];
(*R1).y = bufInIm[inOffset + ( me + 65 )*1];
(*R2).y = bufInIm[inOffset + ( 256 - (me + 1 ) )*1];
(*R3).y = bufInIm[inOffset + ( 256 - (me + 65 ) )*1];
if((rw > 1) && !me)
{
bufOut2[outOffset].x = bufInIm[inOffset];
bufOut2[outOffset].y = 0;
}

barrier(CLK_LOCAL_MEM_FENCE);


if(rw)
{
bufOut[outOffset + ( me + 1 )*1] = (float2)( ((*R0).x + (*R2).x)*0.5, +((*R0).y - (*R2).y)*0.5 );
bufOut[outOffset + ( me + 65 )*1] = (float2)( ((*R1).x + (*R3).x)*0.5, +((*R1).y - (*R3).y)*0.5 );
}


if(rw > 1)
{
bufOut2[outOffset + ( me + 1 )*1] = (float2)( ((*R0).y + (*R2).y)*0.5, +(-(*R0).x + (*R2).x)*0.5 );
bufOut2[outOffset + ( me + 65 )*1] = (float2)( ((*R1).y + (*R3).y)*0.5, +(-(*R1).x + (*R3).x)*0.5 );
}

}

typedef union { uint u; int i; } cb_t;

__kernel __attribute__((reqd_work_group_size (64,1,1)))
void fft_fwd(__constant cb_t *cb __attribute__((max_constant_size(32))), __global float * restrict gbIn, __global float2 * restrict gbOut)
{
uint me = get_local_id(0);
uint batch = get_group_id(0);

__local float lds[256];

uint iOffset;
uint oOffset;

uint iOffset2;
uint oOffset2;

__global float *lwbIn2;
__global float *lwbIn;
__global float2 *lwbOut2;
__global float2 *lwbOut;

float2 R0, R1, R2, R3;

uint this = (256 * cb[0].u) - batch*2;
uint rw = (me < ((this+1)/2)*64) ? (this - 2*(me/64)) : 0;

uint b = 0;

iOffset = ((batch*2 + 0)/256)*65536 + ((batch*2 + 0)%256)*256;
oOffset = ((batch*2 + 0)/256)*65536 + ((batch*2 + 0)%256)*256;
iOffset2 = ((batch*2 + 1)/256)*65536 + ((batch*2 + 1)%256)*256;
oOffset2 = ((batch*2 + 1)/256)*65536 + ((batch*2 + 1)%256)*256;

lwbIn2 = gbIn + iOffset2;
lwbIn = gbIn + iOffset;
lwbOut2 = gbOut + oOffset2;
lwbOut = gbOut + oOffset;

FwdPass0(rw, b, me, 0, 0, lwbIn, lwbIn2, lds, lds, &R0, &R1, &R2, &R3);
FwdPass1(rw, b, me, 0, 0, lds, lds, lds, lds, &R0, &R1, &R2, &R3);
FwdPass2(rw, b, me, 0, 0, lds, lds, lds, lds, &R0, &R1, &R2, &R3);
FwdPass3(rw, b, me, 0, 0, lds, lds, lwbOut, lwbOut2, &R0, &R1, &R2, &R3);
}

Attachments

Outcomes