7 Replies Latest reply on Aug 12, 2010 1:32 PM by Hiran47

    OpenCL code results a 'GPU device not Responding' Error...Any ideas...???

    Hiran47
      Please Note that I have used the CLOO (a C# extension of OpenCL) to do the followings....

      I'm new to OpenCL & doing a research based on this context. I have used CLOO (a C# extension of OpenCL by OpenTK) since it is more flexible to my preference over C#.

      As a part of my research project, I'm doing a PI calculation in my GPU using OpenCL (merely to show that GPU is a better unit for general purpose calculations). I have an ATI Radeon HD 5770 GPU & Everything goes well for inputs under 1000. But whenever the input is larger than 1000 (say 1500), the display goes blank & results the following error.


      http://www.opentk.com/files/inline_images/ErrorMsg.jpg


      I tried the same code in an NVIDIA 9800 GPU & the same problem arose for inputs larger than 400.
      The same code runs perfectly for any input when using only the CPU.

      If you have an idea about a way to get rid of this issue, please help me.

      Thanks in advance.

      Here is my Code:
      (FYI: I found the PI calculation code from the CodeProject)

      using System; using System.Collections.Generic; using System.Linq; using System.Text; using Cloo; using System.Runtime.InteropServices; using System.Diagnostics; namespace PI_with_Cloo { class Program { static ComputePlatform platform = ComputePlatform.Platforms[0]; #region Kernel Source static string kernelSource = @" #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int mul_mod(int a, int b, int m) { return (int)(((long)a) * (long)(b)) % m; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int inv_mod(int x, int y) { int q,u,v,a,c,t; u=x; v=y; c=1; a=0; q=v/u; t=c; c=a-q*c; a=t; t=u; u=v-q*u; v=t; while (u!=0) { q=v/u; t=c; c=a-q*c; a=t; t=u; u=v-q*u; v=t; } a=a % y; if (a<0) a=y+a; return a; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int powmod(int a, int b, int m) { int r, aa; r=1; aa=a; while (1) { if ((b & 1) != 0) { r = mul_mod(r, aa, m); } b = b >> 1; if (b == 0) break; aa = mul_mod(aa, aa, m); } return r; } // return true if n is prime #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable bool is_prime(int n) { int r; if ((n % 2) == 0) { return false; } r = (int)sqrt((float)n); for (int i = 3; i <= r; i += 2) { if ((n % i) == 0) { return false; } } return true; } // return the prime number immediatly after n #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int next_prime(int n) { n++; while (is_prime(n)==false) { n++; } return n; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable float frac(float d) { if (d>0) { return d-((int)d); } else { return d+floor(d); } } //start from digit n, return 9 digits #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable float CalculatePiDigits(int n) { int av, vmax, num, den, s, t; int nb; float sum; int a; int i; int k,v,kq,kq2; nb = (int) ((n + 20) * log(10.0) / log(2.0)) ; sum = 0; a=3; for (a = 3; a <= (2 * nb); a = next_prime(a)) { vmax = (int) ((log((float)(2 * nb)) / log((float)a))); av = 1; for (i= 0; i<vmax; i++) { av = av * a; } s = 0; num = 1; den = 1; v = 0; kq = 1; kq2 = 1; for (k=1; k<=nb; k++) { t = k; if (kq >= a) { t = t/a; v--; while ((t % a) == 0) { t = t/a; v--; } kq = 0; } kq++; num = mul_mod(num, t, av); t = 2 * k - 1; if (kq2 >= a) { if (kq2 == a) { t = t/a; v++; while ((t % a) == 0) { t = t/a; v++; } } kq2 = kq2-a; } den = mul_mod(den, t, av); kq2 = kq2 + 2; if (v > 0) { t = inv_mod(den, av); t = mul_mod(t, num, av); t = mul_mod(t, k, av); for (i=v; i<vmax; i++) { t = mul_mod(t, a, av); } s = s + t; if (s >= av) { s = s-av; } } } t = powmod(10, n - 1, av); s = mul_mod(s, t, av); sum = frac(sum + (float) s / (float) av); } return sum; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable __kernel void PI(__global int *dval ,__global float *sumval) { int gid = get_global_id(0); int d = dval[gid]; float sum = CalculatePiDigits(d); sumval[gid] = sum; } "; #endregion static void Main(string[] args) { ComputeContextPropertyList pList = new ComputeContextPropertyList(platform); ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, pList, null, IntPtr.Zero); Console.WriteLine("Platform Vendor: " + platform.Vendor); Console.WriteLine("Platform Name : " + platform.Name); Console.WriteLine("Device Name : " + context.Devices[0].Name); Console.WriteLine(); // Get the Input Value... Console.Write("Enter an Integer for PI calculation: "); int digits = int.Parse(Console.ReadLine()); Stopwatch sw = new Stopwatch(); sw.Start(); digits = (digits - 1) / 9; digits++; Console.WriteLine("Calculating " + (digits * 9 + 1) + " PI digits...."); char[] pival = new char[digits * 9 + 1]; pival[digits * 9] = '0'; int[] dval = new int[digits]; for (int i = 0; i < digits; i++) { dval[i] = 1 + (i * 9); } ComputeBuffer<int> PI_dval = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, dval); ComputeBuffer<float> PI_sumval = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, digits); // Calculate PI ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("PI"); kernel.SetMemoryArgument(0, PI_dval); kernel.SetMemoryArgument(1, PI_sumval); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeEventList events = new ComputeEventList(); commands.Execute(kernel, null, new long[] { digits }, null, events); float[] sumval = new float[digits]; GCHandle arrCHandle = GCHandle.Alloc(sumval, GCHandleType.Pinned); commands.Read(PI_sumval, false, 0, digits, arrCHandle.AddrOfPinnedObject(), events); commands.Finish(); arrCHandle.Free(); char[] s; string str; for (int i = 0; i < digits; i++) { int d = 1 + i * 9; str = string.Format("{0:000000000}", (int)(sumval[i] * 1e9)); s = str.ToCharArray(0, 9); for (int j = 0; j < 9; j++) { pival[d - 1 + j] = s[j]; } } sw.Stop(); Console.WriteLine("Calculation completed successfully.\nCompute Time: " + sw.Elapsed.ToString()); Console.Write("The Value is: 3."); for (int i = 0; i < pival.Length; i++) { Console.Write(pival[i].ToString()); } Console.ReadLine(); } } }

        • OpenCL code results a 'GPU device not Responding' Error...Any ideas...???
          notyou

           

          Originally posted by: Hiran47 I'm new to OpenCL & doing a research based on this context. I have used CLOO (a C# extension of OpenCL by OpenTK) since it is more flexible to my preference over C#. As a part of my research project, I'm doing a PI calculation in my GPU using OpenCL (merely to show that GPU is a better unit for general purpose calculations). I have an ATI Radeon HD 5770 GPU & Everything goes well for inputs under 1000. But whenever the input is larger than 1000 (say 1500), the display goes blank & results the following error.

           

          http://www.opentk.com/files/inline_images/ErrorMsg.jpg

           

          I tried the same code in an NVIDIA 9800 GPU & the same problem arose for inputs larger than 400. The same code runs perfectly for any input when using only the CPU. If you have an idea about a way to get rid of this issue, please help me.

           

          Thanks in advance.

           

          Here is my Code: (FYI: I found the PI calculation code from the CodeProject)

           

          The problem is not with the code, but rather how the driver handles input. Currently, the driver is set to time out (VPU recover) after a certain amount of time (when it does not respond, in this case, because you are executing your program), causing the "crash" you're experiencing.

          There are two things you can do to avoid this:

          1. Disable VPU recover (not recommended, but it may be worth it once you have verified the code works).

          2. Code around it so that after every X iterations it returns to the main program. You can then carry that result forward by passing it back to the GPU. This lets the PC know that the GPU is still responding so it doesn't throw the "stopped responding" message.