5 Replies Latest reply on Oct 12, 2009 10:33 AM by southgary

    problem on built the kernel

    southgary
      The host code is added

      Hello!

      Thank you for answer me in the last question. I have tried to design a iteration program but the following error is found on bulit the kernel.

      Picture: http://i1015.photobucket.com/albums/af280/southgary/bug.jpg

      Anyone know what is that?

       

      __kernel void iter(__global float *jdiag, __global int *col_ind, __global int *jd_ptr, __global float *B, __global float *xnew) { int col = 0; int i = 0; int j = 0; int N = 4; float xold[4] = {0}; int detect_limit = 0; float temp = 0; float tempnew = 0; float tempold = 0; float limit = 0.1; int ii; int jdiag_ptr = 0; int Ndata = 0; //Start iterative for (ii=0;ii<10;ii++){ detect_limit=0; for (i=0;i<N;i++){ Ndata = jd_ptr[i+1] - jd_ptr[i]; //find the number of valua jdiag_ptr = jd_ptr[i]; //store the start pointer for(j=0;j<Ndata;j++){ //start calculate col_ind[jdiag_ptr+j] = col; if(col != i){ temp += jdiag[jdiag_ptr+j]*xold[col]; } } xnew[i] = (B[i]-temp)/jdiag[jdiag_ptr]; } for (i=0;i<N; i++){ tempnew = xnew[i]; tempold = xold[i]; if(xnew[i] < 0){ tempnew = xnew[i]*(-1); } if(xold[i] < 0){ tempold = xold[i]*(-1); //printf("tempold %3.2f\n", tempold); } temp = tempnew-tempold; if(temp < 0){ temp = temp*(-1); } if (temp > limit){ xold[i]=xnew[i]; detect_limit++; } } } }

        • problem on built the kernel
          cgorac

          As for the first warning: you should put "float limit = 0.1f;" there.  As for errors, looks like you have some kind of superfluous chars there - when I copy the code you supplied, and paste it in a text file on my (Linux) machine, clc is able to compile it without any kind of problem.

          • problem on built the kernel
            omkaranathan

            The error log points to error in line 73, where as your code is 61 lines.(Assuming you have directly copy pasted the kernel code here). Are you sure that there are no stray characters or so in your .cl file?

            If that is not the case, could you post the host side code too so that I can try and reproduce the problem?

              • problem on built the kernel
                southgary

                Thank you cgorac and omkaranathan

                I have tried to re-copy the posted code to my program and revise the float 0.1 to 0.1f. The same error is now move to line 60. I will post the host code later when I go back to my work stations

                =======================================================

                Attached please see the host code. I am using Windows Vista SP1 and VC++ 2008.

                 

                #include <CL/cl.hpp> #include <stdio.h> #include <stdlib.h> #include <time.h> #include <sys/stat.h> #include <windows.h> #include <SDKUtil/SDKFile.hpp> #define N 4 //--------------Define the structure of complex number------------------------- /*typedef struct { float real; float imag; } complex; complex mycomplex(float a,float b){ complex c; c.real = a; c.imag = b; return c; } complex add(complex c1,complex c2){ complex c; c.real=c1.real + c2.real; c.imag=c1.imag + c2.imag; return c; } complex subtract(complex c1,complex c2){ complex c; c.real=c1.real-c2.real; c.imag=c1.imag-c2.imag; return c; } complex multiply(complex c1,complex c2){ complex c; c.real=((c1.real*c2.real)-(c1.imag*c2.imag)); c.imag=((c1.imag*c2.real)+(c1.real*c2.imag)); return c; } complex divide(complex c1,complex c2){ complex c; c.real=((c1.real*c2.real)+(c1.imag*c2.imag))/((c2.real*c2.real)+(c2.imag*c2.imag)); c.imag=((c1.imag*c2.real)-(c1.real*c2.imag))/((c2.real*c2.real)+(c2.imag*c2.imag)); return c; }*/ char * load_program_source(const char *filename) { struct stat statbuf; FILE *fh; char *source; fh = fopen(filename, "r"); if (fh == 0) return 0; stat(filename, &statbuf); source = (char *) malloc(statbuf.st_size + 1); fread(source, statbuf.st_size, 1, fh); source[statbuf.st_size] = '\0'; return source; } //-------------------------------Run OpenCL----------------------------------------- int runCL (float *jdiag, int *col_ind, int *jd_ptr, float *b, float *results, int row) { cl_context mycontext; cl_command_queue cmd_queue; cl_int err; size_t returned_size = 0; size_t buffer_size = sizeof(float) * row; cl_device_id devices; cl_device_id cpu; cl_char vendor_name[1024] = {0}; cl_char device_name[1024] = {0}; cl_uint max_compute_units[1] = {0}; cl_program program; cl_kernel kernel; cl_mem jdiag_mem, col_ind_mem, jd_ptr_mem, B_mem, Xnew_mem; DWORD start2,end2; //Find the CPU OpenCL devices that could be used err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_CPU, 1, &cpu, NULL); if (err != CL_SUCCESS) { printf("\nFail to create CPU devices group!\n"); system("pause"); return EXIT_FAILURE; } else if (err == CL_SUCCESS) { printf("\nSuccess to create CPU devices group!\n"); } // Find the GPU CL device, this is what we really want // If there is no GPU device is CL capable, fall back to CPU err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &devices, NULL); if (err != CL_SUCCESS) { printf("\nFail to create GPU devices group!\nReturn to CPU devices group\n"); devices = cpu; } else if (err == CL_SUCCESS) { printf("\nSuccess to create GPU devices group!\n"); } // Get some information about the returned device err = clGetDeviceInfo(devices, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size); err |= clGetDeviceInfo(devices, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size); err |= clGetDeviceInfo(devices, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(max_compute_units), max_compute_units, &returned_size); if (err == CL_SUCCESS) { printf("Connecting to %s\n%s\n", vendor_name, device_name); printf("Maximum compute units are used: %d\n", max_compute_units); } //Create the context printf("Creating the context: "); mycontext = clCreateContextFromType(NULL, CL_DEVICE_TYPE_CPU, NULL, NULL, &err); if (!mycontext) { printf("Fail!\n"); } else if (mycontext) { printf("Success!\n"); } // Create the command queue for the context printf("Creating the command queue: "); cmd_queue = clCreateCommandQueue(mycontext, devices, 0, NULL); if (!cmd_queue) { printf("Fail!\n"); } else if (cmd_queue) { printf("Success!\n"); } // Allocate memory on the device to hold our data and store the results into // Input array jdiag jdiag_mem = clCreateBuffer(mycontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_size, (void*)jdiag, NULL); //Input array col_ind col_ind_mem = clCreateBuffer(mycontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_size, (void*)col_ind, NULL); //Input array jd_ptr jd_ptr_mem = clCreateBuffer(mycontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_size, (void*)jd_ptr, NULL); // Input array B B_mem = clCreateBuffer(mycontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, buffer_size, (void*)b, NULL); // Input array X Xnew_mem = clCreateBuffer(mycontext, CL_MEM_READ_WRITE, buffer_size, NULL, NULL); // Get all of the stuff written and allocated clFinish(cmd_queue); //Create and build the program const char * filename = "LUdecomp.cl"; char *program_source = load_program_source(filename); program = clCreateProgramWithSource(mycontext, 1, (const char**)&program_source, NULL, &err); if (err != CL_SUCCESS) { printf("\nFail to create the program\n"); return EXIT_FAILURE; } else if (err == CL_SUCCESS) { printf("\nSuccess to create the program\n"); } err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { printf("\nFail to build the program\n"); size_t len; char buffer[2048]; clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len); printf("%s\n", buffer); //clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_STATUS, sizeof(buffer), buffer, &len); //printf("%s\n", buffer); //clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_OPTIONS, sizeof(buffer), buffer, &len); //printf("%s\n", buffer); return EXIT_FAILURE; } else if (err == CL_SUCCESS) { printf("\nSuccess to build the program\n"); } //Create the Kernal kernel = clCreateKernel(program, "iter", &err); // Now setup the arguments to our kernel err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &jdiag_mem); err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &col_ind_mem); err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &jd_ptr_mem); err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &B_mem); err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &Xnew_mem); //if (err != CL_SUCCESS) //{ // printf("\nFail to Set Kernal Arg value! Reason:"); //return EXIT_FAILURE; //} if (err == CL_SUCCESS) { printf("\nSuccess to Set Kernal Arg value!\n"); } else if (err == CL_INVALID_KERNEL) { printf("\nError: CL_INVALID_KERNEL\n"); return EXIT_FAILURE; } else if (err == CL_INVALID_ARG_VALUE) { printf("\nError: CL_INVALID_ARG_VALUE\n"); return EXIT_FAILURE; } else if (err == CL_INVALID_MEM_OBJECT) { printf("\nError: CL_INVALID_MEM_OBJECT\n"); return EXIT_FAILURE; } else if (err == CL_INVALID_SAMPLER) { printf("\nError: CL_INVALID_SAMPLER\n"); return EXIT_FAILURE; } else if (err == CL_INVALID_ARG_SIZE) { printf("\nError: CL_INVALID_ARG_SIZE\n"); return EXIT_FAILURE; } else if (err == CL_INVALID_ARG_INDEX) { printf("\nError: CL_INVALID_ARG_INDEX\n"); return EXIT_FAILURE; } start2=GetTickCount(); // Run the calculation by enqueuing it and forcing the // command queue to complete the task size_t global_work_size = row; err = clEnqueueNDRangeKernel(cmd_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("\nFail to Enqueue the NDRange!\n"); return EXIT_FAILURE; } else if (err == CL_SUCCESS) { printf("\nSuccess to Enqueue the NDRange!\n"); } clFinish(cmd_queue); end2=GetTickCount(); printf("\nThe times that kernel has taken to execute: %ldms\n\n", end2-start2); // Once finished read back the results from the answer // array into the results array err = clEnqueueReadBuffer(cmd_queue, Xnew_mem, CL_TRUE, 0, buffer_size, results, 0, NULL, NULL); if (err != CL_SUCCESS) { printf("\nFail to read the result from kernel!\n"); return EXIT_FAILURE; } else if (err == CL_SUCCESS) { printf("\nSuccess to read the result from kernel!\n"); } clFinish(cmd_queue); //Rekease everything that is used clReleaseMemObject(jdiag_mem); clReleaseMemObject(col_ind_mem); clReleaseMemObject(jd_ptr_mem); clReleaseMemObject(B_mem); clReleaseMemObject(Xnew_mem); clReleaseCommandQueue(cmd_queue); clReleaseContext(mycontext); return CL_SUCCESS; } int main (){ //Define the varible int row = N; float **A; float *b; float *results; int i = 0; int j = 0; int k = 0; int ptr = 0; float *jdiag; int *col_ind; int *jd_ptr; DWORD start1,end1; //Assign the memory of matrix a A=(float **)malloc(sizeof(float*)*N); for(i=0;i<N;i++) { A[i]=(float*)malloc(sizeof(float)*N); } jdiag=(float *)malloc(sizeof(float)*(N*N)); col_ind=(int *)malloc(sizeof(int)*(N*N)); jd_ptr=(int *)malloc(sizeof(int)*(N+1)); b=(float *)malloc(sizeof(float)*N); results=(float *)malloc(sizeof(float)*N); //Read the matrix from file FILE *in_A=fopen("C:\\matrix_A.txt","r"); for(i=0;i<N;i++){ for(j=0;j<N;j++){ fscanf(in_A,"%f",&A[i][j]); } } FILE *in_B=fopen("C:\\matrix_B.txt","r"); for(i=0;i<N;i++){ fscanf(in_B,"%f",&b[i]); } //initial the array for(i=0;i<(N*N);i++){ jdiag[i] = 0; col_ind[i] = 0; } for(i=0;i<(N+1);i++){ jd_ptr[i] = 0; } //Start to rearrange the matrix for(i=0;i<N;i++) { for(j=0;j<N;j++) { if (A[i][j] != 0) { jdiag[k] = A[i][j]; col_ind[k] = j; k++; } } ptr++; jd_ptr[ptr] = k; } //Start to count the times start1=GetTickCount(); runCL(jdiag, col_ind, jd_ptr, b, results, row); //Print out the result printf("\nThe matrix X is:\n"); for(i=0;i<N;i++){ // for(j=0;j<row;j++){ printf("%6.2f", results[i]); //} printf("\n"); } fclose(in_A); fclose(in_B); //End to count the times end1=GetTickCount(); printf("\nThe times that system has taken to execute: %ldms\n\n", end1-start1); return 0; }