3 Replies Latest reply on Sep 6, 2012 2:56 AM by notzed

    Weird behavior of barrier()

    t-man
      "__kernel void square(                                                   \n" \
      "   __global int* nbr,                                          \n" \
      "   __global double* BC,                                         \n" \
      "   const int ne,                                       \n" \
      "   const int nv, \n" \
      "   __global int* firstnbr, \n" \
      "   __global double* sigma,\n" \
      "   __global int* queue, \n" \
      "   __global int* queue2, \n" \
      "   __global int* nr_queue, \n" \
      "   __global int* nr_queue2, \n" \
      "   __global int* stack, \n" \
      "   __global int* nr_stack, \n" \
      "   __global int* succ, \n" \
      "   __global int* level, \n" \
      "   __global int* nr_level, \n" \
      "   __global int *succ_index, \n" \
      "   __global int* found, \n" \
      "   __global double* delta, \n" \
      "  __global int* d, \n" \
      "   __global int* sem)\n" \
      "{                                                                  \n" \
      "   int i = get_local_id(0);                                      \n" \
      "int glob = get_global_id(0); \n" \
      "   int j = get_num_groups(0);\n" \
      " __local int k; k = get_group_id(0); \n" \
      "  __local int size; size = get_local_size(0); \n"\
      "   sem[0] = 0; \n" \
      " \n" \
      "   *found = 1; \n" \
         " __local int nr_roots, count,rest,root,nr_neigh; \n " \
         " int neigh_per_thread,h,node,dw,f,gh,temp,nr=0; \n " \
         " queue[0] = 0; \n" \
         " //barrier(CLK_LOCAL_MEM_FENCE); \n" \
         " while ( *found != 0){ \n" \
          "    *found = 0;\n" \
          "  \n" \
           "   //barrier(CLK_LOCAL_MEM_FENCE); \n " \
           "BC[0] = 1; BC[1] = *nr_level%2;\n" \
         "     if(*nr_level%2 == 0) \n" \
          "    {      \n" \
          "            nr_roots = (*nr_queue+1)/j; count = 0; rest = (*nr_queue+1)%j; \n" \
           "           if(k<rest  && i==0) \n" \
            "                  nr_roots=nr_roots + 1; \n" \
             "         *nr_queue2 = 0; if(k==0&&i==0) BC[5] = nr_roots;\n" \
              "        barrier(CLK_LOCAL_MEM_FENCE);   if(k==0&&i==0) BC[6] = nr_roots; \n" \

       

      }

      }

      So what happends is that I need each workgroup to take an equal number of elements from a queue, that has nr_queue elements. 1st step it has 1 element so only group 0 will get 1 element. Thus "nr_roots" becomes 1 and I put it in BC[5]. Then I do a barrier to make sure that all the workitems on the workgroup know that nr_roots is 1, but when I out nr_roots in BC[6] the result is 0. Any1 has any idea why this might be? Thanks!
        • Re: Weird behavior of barrier()
          binying

          Maybe you can enable this,

          #pragma OPENCL EXTENSION cl_amd_printf : enable

          so that you can print nr_roots at those lines that confuse you. Well, this

          may not be the smartest way, but it works.

           

           

          -----------------

          #pragma OPENCL EXTENSION cl_amd_printf : enable

           

           

           

           

           

           

          __kernel void async_copy (__global int* in, __global int* out, __local int * sdata)

          {

                    int2 threadIdx;

                    int2 blockIdx;

                    int2 blockDim;

                    threadIdx.x = (int)get_local_id(0);

                    threadIdx.y = (int)get_local_id(1);

                    blockIdx.x = (int)get_group_id(0);

                    blockIdx.y = (int)get_group_id(1);

                    blockDim.x = (int)get_local_size(0);

                    blockDim.y = (int)get_local_size(1);

                    int xDim = (int)get_global_size(0);

                    int yDim = (int)get_global_size(1);

           

           

                    int2 idx;

                    idx.x = blockDim.x*blockIdx.x + threadIdx.x;

                    idx.y = blockDim.y*blockIdx.y + threadIdx.y;

           

           

                    int xPos = (int)get_global_id(0);

                    int yPos = (int)get_global_id(1);

           

                    if((xPos == 0) && (yPos==0))

                              printf("%d.%d %d.%d\n", xPos, yPos, idx.x, idx.y);

           

           

                    int gidx = yPos*xDim + xPos;

                    out[gidx] = gidx;

          ...

          }

          • Re: Weird behavior of barrier()
            t-man

            problem has to do with the fact that the variable needs to be declared with the "volatile" attribute, such that threads read it from the memory and not registers. ( at least thats what I think ) adding volatile seems to solve it

            • Re: Weird behavior of barrier()
              notzed

              Might have something to do with the fact that BC is a global, but you're using a local barrier.

               

              Nor can barriers be conditional.

               

              Not to mention that every thread is writing to the same addresses.