cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

Hiran47
Journeyman III

OpenCL code results a 'GPU device not Responding' Error...Any ideas...???

Please Note that I have used the CLOO (a C# extension of OpenCL) to do the followings....

I'm new to OpenCL & doing a research based on this context. I have used CLOO (a C# extension of OpenCL by OpenTK) since it is more flexible to my preference over C#.

As a part of my research project, I'm doing a PI calculation in my GPU using OpenCL (merely to show that GPU is a better unit for general purpose calculations). I have an ATI Radeon HD 5770 GPU & Everything goes well for inputs under 1000. But whenever the input is larger than 1000 (say 1500), the display goes blank & results the following error.


http://www.opentk.com/files/inline_images/ErrorMsg.jpg


I tried the same code in an NVIDIA 9800 GPU & the same problem arose for inputs larger than 400.
The same code runs perfectly for any input when using only the CPU.

If you have an idea about a way to get rid of this issue, please help me.

Thanks in advance.

Here is my Code:
(FYI: I found the PI calculation code from the CodeProject)

using System; using System.Collections.Generic; using System.Linq; using System.Text; using Cloo; using System.Runtime.InteropServices; using System.Diagnostics; namespace PI_with_Cloo { class Program { static ComputePlatform platform = ComputePlatform.Platforms[0]; #region Kernel Source static string kernelSource = @" #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int mul_mod(int a, int b, int m) { return (int)(((long)a) * (long)(b)) % m; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int inv_mod(int x, int y) { int q,u,v,a,c,t; u=x; v=y; c=1; a=0; q=v/u; t=c; c=a-q*c; a=t; t=u; u=v-q*u; v=t; while (u!=0) { q=v/u; t=c; c=a-q*c; a=t; t=u; u=v-q*u; v=t; } a=a % y; if (a<0) a=y+a; return a; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int powmod(int a, int b, int m) { int r, aa; r=1; aa=a; while (1) { if ((b & 1) != 0) { r = mul_mod(r, aa, m); } b = b >> 1; if (b == 0) break; aa = mul_mod(aa, aa, m); } return r; } // return true if n is prime #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable bool is_prime(int n) { int r; if ((n % 2) == 0) { return false; } r = (int)sqrt((float)n); for (int i = 3; i <= r; i += 2) { if ((n % i) == 0) { return false; } } return true; } // return the prime number immediatly after n #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable int next_prime(int n) { n++; while (is_prime(n)==false) { n++; } return n; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable float frac(float d) { if (d>0) { return d-((int)d); } else { return d+floor(d); } } //start from digit n, return 9 digits #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable float CalculatePiDigits(int n) { int av, vmax, num, den, s, t; int nb; float sum; int a; int i; int k,v,kq,kq2; nb = (int) ((n + 20) * log(10.0) / log(2.0)) ; sum = 0; a=3; for (a = 3; a <= (2 * nb); a = next_prime(a)) { vmax = (int) ((log((float)(2 * nb)) / log((float)a))); av = 1; for (i= 0; i<vmax; i++) { av = av * a; } s = 0; num = 1; den = 1; v = 0; kq = 1; kq2 = 1; for (k=1; k<=nb; k++) { t = k; if (kq >= a) { t = t/a; v--; while ((t % a) == 0) { t = t/a; v--; } kq = 0; } kq++; num = mul_mod(num, t, av); t = 2 * k - 1; if (kq2 >= a) { if (kq2 == a) { t = t/a; v++; while ((t % a) == 0) { t = t/a; v++; } } kq2 = kq2-a; } den = mul_mod(den, t, av); kq2 = kq2 + 2; if (v > 0) { t = inv_mod(den, av); t = mul_mod(t, num, av); t = mul_mod(t, k, av); for (i=v; i<vmax; i++) { t = mul_mod(t, a, av); } s = s + t; if (s >= av) { s = s-av; } } } t = powmod(10, n - 1, av); s = mul_mod(s, t, av); sum = frac(sum + (float) s / (float) av); } return sum; } #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable __kernel void PI(__global int *dval ,__global float *sumval) { int gid = get_global_id(0); int d = dval[gid]; float sum = CalculatePiDigits(d); sumval[gid] = sum; } "; #endregion static void Main(string[] args) { ComputeContextPropertyList pList = new ComputeContextPropertyList(platform); ComputeContext context = new ComputeContext(ComputeDeviceTypes.Gpu, pList, null, IntPtr.Zero); Console.WriteLine("Platform Vendor: " + platform.Vendor); Console.WriteLine("Platform Name : " + platform.Name); Console.WriteLine("Device Name : " + context.Devices[0].Name); Console.WriteLine(); // Get the Input Value... Console.Write("Enter an Integer for PI calculation: "); int digits = int.Parse(Console.ReadLine()); Stopwatch sw = new Stopwatch(); sw.Start(); digits = (digits - 1) / 9; digits++; Console.WriteLine("Calculating " + (digits * 9 + 1) + " PI digits...."); char[] pival = new char[digits * 9 + 1]; pival[digits * 9] = '0'; int[] dval = new int[digits]; for (int i = 0; i < digits; i++) { dval = 1 + (i * 9); } ComputeBuffer<int> PI_dval = new ComputeBuffer<int>(context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, dval); ComputeBuffer<float> PI_sumval = new ComputeBuffer<float>(context, ComputeMemoryFlags.WriteOnly, digits); // Calculate PI ComputeProgram program = new ComputeProgram(context, new string[] { kernelSource }); program.Build(null, null, null, IntPtr.Zero); ComputeKernel kernel = program.CreateKernel("PI"); kernel.SetMemoryArgument(0, PI_dval); kernel.SetMemoryArgument(1, PI_sumval); ComputeCommandQueue commands = new ComputeCommandQueue(context, context.Devices[0], ComputeCommandQueueFlags.None); ComputeEventList events = new ComputeEventList(); commands.Execute(kernel, null, new long[] { digits }, null, events); float[] sumval = new float[digits]; GCHandle arrCHandle = GCHandle.Alloc(sumval, GCHandleType.Pinned); commands.Read(PI_sumval, false, 0, digits, arrCHandle.AddrOfPinnedObject(), events); commands.Finish(); arrCHandle.Free(); char[] s; string str; for (int i = 0; i < digits; i++) { int d = 1 + i * 9; str = string.Format("{0:000000000}", (int)(sumval * 1e9)); s = str.ToCharArray(0, 9); for (int j = 0; j < 9; j++) { pival[d - 1 + j] = s; } } sw.Stop(); Console.WriteLine("Calculation completed successfully.\nCompute Time: " + sw.Elapsed.ToString()); Console.Write("The Value is: 3."); for (int i = 0; i < pival.Length; i++) { Console.Write(pival.ToString()); } Console.ReadLine(); } } }

0 Likes
7 Replies
notyou
Adept III

Originally posted by: Hiran47 I'm new to OpenCL & doing a research based on this context. I have used CLOO (a C# extension of OpenCL by OpenTK) since it is more flexible to my preference over C#. As a part of my research project, I'm doing a PI calculation in my GPU using OpenCL (merely to show that GPU is a better unit for general purpose calculations). I have an ATI Radeon HD 5770 GPU & Everything goes well for inputs under 1000. But whenever the input is larger than 1000 (say 1500), the display goes blank & results the following error.

 

http://www.opentk.com/files/inline_images/ErrorMsg.jpg

 

I tried the same code in an NVIDIA 9800 GPU & the same problem arose for inputs larger than 400. The same code runs perfectly for any input when using only the CPU. If you have an idea about a way to get rid of this issue, please help me.

 

Thanks in advance.

 

Here is my Code: (FYI: I found the PI calculation code from the CodeProject)

 

The problem is not with the code, but rather how the driver handles input. Currently, the driver is set to time out (VPU recover) after a certain amount of time (when it does not respond, in this case, because you are executing your program), causing the "crash" you're experiencing.

There are two things you can do to avoid this:

1. Disable VPU recover (not recommended, but it may be worth it once you have verified the code works).

2. Code around it so that after every X iterations it returns to the main program. You can then carry that result forward by passing it back to the GPU. This lets the PC know that the GPU is still responding so it doesn't throw the "stopped responding" message.

0 Likes

Originally posted by: notyouThe problem is not with the code, but rather how the driver handles input. Currently, the driver is set to time out (VPU recover) after a certain amount of time (when it does not respond, in this case, because you are executing your program), causing the "crash" you're experiencing.


If you're using Vista or Windows 7 then it's not VPU recover that's triggering, but TDR (timeout detection and recovery).  This is a feature of the OS.  If a command packet takes more than 2 seconds (default timeout) then the OS attempts to reset the device, thinking it has locked up.

You can change TDR behavior in the registry.  See http://www.microsoft.com/whdc/device/display/wddm_timeout.mspx for more information.

Jeff

0 Likes

@notyou & jeff_golds: Thanx a lot for your quick replies guys. Now I know the reason for the above error.

But, is there a way to disable TDR...? I searched the registry for the keys mentioned in the link provided by jeff but there were no such keys.

0 Likes

Originally posted by: Hiran47 @notyou & jeff_golds: Thanx a lot for your quick replies guys. Now I know the reason for the above error.

 

But, is there a way to disable TDR...? I searched the registry for the keys mentioned in the link provided by jeff but there were no such keys.

 

Hiran47,

          It is not recommended to disable VPU or TDR.  It is good to reduce kernel execution time instead of disabling VPU or TDR.

0 Likes

Hiran47,

         It is not recommended to disable VPU or TDR.  It is good to reduce kernel execution time instead of disabling VPU or TDR.

genaganna,

         hmmm....true, but at the moment, I'm fed up with trying to reduce the kernel execution time....

anyway, thanx for the advice...

0 Likes

Originally posted by: Hiran47 @notyou & jeff_golds: Thanx a lot for your quick replies guys. Now I know the reason for the above error.

 

But, is there a way to disable TDR...? I searched the registry for the keys mentioned in the link provided by jeff but there were no such keys.

 

The keys are not present by default, you have to add them in the appropriate place.

Jeff

0 Likes

The keys are not present by default, you have to add them in the appropriate place.

Jeff

 

        i c....will try that....thanx again

0 Likes