AnsweredAssumed Answered

Problem writing to write buffer from within kernel

Question asked by ankhster on Sep 24, 2012
Latest reply on Oct 6, 2012 by ankhster

Hi

 

I'm fairly new to OpenCL and I'm having a great deal of trouble trying to copy data (6 times int8 vectors) from the device to the host. I've set all the important bits that I can identify with, as described below. This is running with the following:

Windows 7 x64

Visual Studio 2008

AMD Catalyst 12.8

SDK 2.7.923.1

Tahiti 7970.

 

Any help would be very much appreciated.

 

Host:

 

   cl_mem    inDevice, outDevice;

   cl_int    err;

   size_t    global;

 

   global = 1;

   vecPart = 0;

 

   inDevice = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(cl_int), NULL, NULL);

   outDevice = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_int8) * global * 6, NULL, NULL);

   .

   err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &inDevice);

   err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &outDevice);

   .

   err = clEnqueueWriteBuffer(commands, inDevice, CL_TRUE, 0, sizeof(int), &vecPart, 0, NULL, NULL);

   .

   err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);

   .

 

Device:

 

__kernel void myproc(__global int *position, __global int *outBuffer)

{

    int8    tallyL = 0;

    int8    tallyLO = 0;

    int8    tallyLOS = 0;

    int8    tallyLM = 0;

    int8    tallyLMO = 0;

    int8    tallyLMOS = 0;

    .

    .

    .

 

    id = get_global_id(0);

    id *= 6;

    vstore8(tallyL, id, outBuffer);        // Device stops responding but recovers

    vstore8(tallyLO, id + 1, outBuffer);

    vstore8(tallyLOS, id + 2, outBuffer);

    vstore8(tallyLM, id + 3, outBuffer);

    vstore8(tallyLMO, id + 4, outBuffer);

    vstore8(tallyLMOS, id + 5, outBuffer);

}

 

 

I then commented the vstore commands and the kernel completed without any problems, albeit I didn't get any results.

    .

    .

    .

    id = get_global_id(0);

    id *= 6;

//    vstore8(tallyL, id, outBuffer);        // Uncomment and device stops responding but recovers

//    vstore8(tallyLO, id + 1, outBuffer);

//    vstore8(tallyLOS, id + 2, outBuffer);

//    vstore8(tallyLM, id + 3, outBuffer);

//    vstore8(tallyLMO, id + 4, outBuffer);

//    vstore8(tallyLMOS, id + 5, outBuffer);

}

 

 

I then changed the vstore commands to array writes as described below.

    .

    .

    .

    id = get_global_id(0);

    id *= 6;

    id *= sizeof(int8);        // Everything below commented and completion is in 6 milli-seconds

//    outBuffer[id] = tallyL.s0;    // Uncommented increases to 7.5 seconds

//    outBuffer[id + 1] = tallyL.s1;    // Uncommented with above line increases to 8.5 seconds

//    outBuffer[id + 2] = tallyL.s2;    // Uncommented with above 2 lines increases to 11.5 seconds

//    outBuffer[id + 3] = tallyL.s3;    // Uncommented with above 3 lines - device stops responding but recovers

Outcomes