4 Replies Latest reply on Jun 16, 2010 5:00 PM by MicahVillmow

    1 Kernel several functions

    opencl_JEDI

      Hi,

      this is my problem, I have 3 functions in my .cl file and I called them in a kernel. it'a an image processing functions, so I do 3 transformations on the image, now I'm working in the global memory, but I want to work in the local.

      what I want to do is to transfer work-groups to local memory, do the 3 operations then write the result back to global, but since each transformation is a separate function, is this possible?

      I can detail more if needed.

        • 1 Kernel several functions
          Illusio

          Sure. You can pass pointers to a local memory buffer to your other functions just fine.

           

            • 1 Kernel several functions
              opencl_JEDI

              I have 2 functions that operates on an image,

              the first transfers work-groups to local memory then does the compute and writes result back to global memory, the second takes the global result and computes it, this works just fine.

              Now, when I pass the local buffer of the first function to the second,(instead of global buffers), it doesn't work. below the pseudo code for the algorithm

              inline void func1(__global unsigned int *datain,
                                         int width, int height, ,
                                         __local unsigned int *LocalData)

              {

                    LocalData[get_local_id(1)*get_local_size(0)+ get_local_id(0)] = datain[get_global_id(1)*(width) + get_global_id(0)] * A;

              }

               

              inline void func2( int width, int height,
                                         __global unsigned int *dataout,
                                         __local unsigned int *LocalData)

              {

                  dataout[get_global_id(1)*(width) + get_global_id(0)] = LocalData[get_local_id(1)*get_local_size(0)+ get_local_id(0)] * B;

               }

               

              __kernel void Main(__global unsigned int *datain,  int width, int height,
                                         __global unsigned int *dataout,
                                         __local unsigned int *LocalData)

              {

                   func1(datain,  width, height, LocalData);

                   func2(width, height, dataout, LocalData);

              }

               

              Hope it's clear enough

               

                • 1 Kernel several functions
                  Illusio

                  Well, your code works for me. With the caveat that passing global buffers to LocalData from host code generates an argument error. If you allocate LocalData statically in the main function it works fine though.

                  Here's the code I executed. The host code is c#. Hope that's not too much of a problem.

                  public unsafe void TestLocalMem() { int globalWidth = 1024; int globalHeight = 1024; int localWidth = 16; int localHeight = 16; int[] inputData = new int[globalWidth * globalHeight]; int[] outputData = new int[globalWidth * globalHeight]; IntPtr[] globalWorkSize = new IntPtr[] { (IntPtr)globalWidth, (IntPtr)globalHeight }; IntPtr[] localWorkSize = new IntPtr[] { (IntPtr)localWidth, (IntPtr)localHeight }; string source = File.ReadAllText(@"localmem.cl"); SimpleOCLHelper socl = new SimpleOCLHelper(OpenCL.GetPlatform(0), DeviceType.GPU, source); Kernel localMemTest = socl.GetKernel("Main"); Mem globalSource = socl.Context.CreateBuffer(MemFlags.READ_WRITE, globalWidth * globalHeight * sizeof(int), IntPtr.Zero); Mem globalDestination = socl.Context.CreateBuffer(MemFlags.READ_WRITE, globalWidth * globalHeight * sizeof(int), IntPtr.Zero); Mem localBuffer = socl.Context.CreateBuffer(MemFlags.READ_WRITE, localWidth * localHeight * sizeof(int), IntPtr.Zero); for (int i = 0; i < inputData.Length; i++) inputData[i] = i; fixed (int* pInputData = inputData) { socl.CQ.EnqueueWriteBuffer(globalSource, true, IntPtr.Zero, (IntPtr)(inputData.Length * sizeof(int)), (IntPtr)pInputData); } for (int i = 0; i < outputData.Length; i++) outputData[i] = 0; fixed (int* pOutputData = outputData) { socl.CQ.EnqueueWriteBuffer(globalDestination, true, IntPtr.Zero, (IntPtr)(outputData.Length * sizeof(int)), (IntPtr)pOutputData); } localMemTest.SetArg(0, globalSource); localMemTest.SetArg(1, globalWidth); localMemTest.SetArg(2, globalHeight); localMemTest.SetArg(3, globalDestination); socl.CQ.EnqueueNDRangeKernel(localMemTest, 2, null, globalWorkSize, localWorkSize); socl.CQ.Finish(); fixed (int* pOutputData = outputData) { socl.CQ.EnqueueReadBuffer(globalDestination, true, IntPtr.Zero, (IntPtr)(outputData.Length * sizeof(int)), (IntPtr)pOutputData); } } ------------- localmem.cl #define A 2 #define B 3 inline void func1(global unsigned int *datain, int width, int height, local unsigned int *LocalData) { LocalData[get_local_id(1)*get_local_size(0)+ get_local_id (0)] = datain[get_global_id(1)*(width) + get_global_id(0)] * A; } inline void func2( int width, int height, global unsigned int *dataout, local unsigned int *LocalData) { dataout[get_global_id(1)*(width) + get_global_id(0)] = LocalData[get_local_id(1)*get_local_size(0)+ get_local_id(0)] * B; } kernel void Main(global unsigned int *datain, int width, int height, global unsigned int *dataout) { local uint LocalData[16*16]; func1(datain, width, height, LocalData); func2(width, height, dataout, LocalData); }

              • 1 Kernel several functions
                MicahVillmow
                opencl_Jedi,
                I have no issue executing this kernel on an internal build, after I defined A and B, so the issue seems to be fixed in the next release.