cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

t-man
Adept II

While loop executed twice by a work-group

Well the problem I have is the following:

I have this kernel that tries to calculate the betweenness centrality of a graph in parallel. What happens is actually very strange since a loop is executed two times by one of the work groups. In average once every 7-8 runs the second while loop ( while(count<nr_roots) ) gets executed twice by a workgroup although at the first iteration the count variable is incremented. So for my case I have a 12 vertices graph and nr_roots is 1, count is 0, the while gets executed, count gets incremented by 1, and still cout will be viewed as 0 one more time.

This happends only once every 6-7 runs, remeber that, not always. Does anyone have any idea why? I also tried making count a __local variable (shared by group) and also made it __private ( shared by the work-item only), no success. Any tips, suggestions are more than welcome!

" while ( found_local != 0){ \n" \

        "                                                                       \n" \

        "                                                                                     \n" \

         "                                                                                      \n " \

         "   if(i==0) {    level_local = atomic_add(nr_level,0); atomic_xchg(found,0);\n" \

         "                 pozition_local = atomic_add(pozition,0);\n " \

        "                  nr_roots = atomic_add(&level[level_local],0)/j; atomic_xchg(&count,0); nr=0; rest = atomic_add(&level[level_local],0)%j;  \n" \

         "                 if(k<rest) nr_roots = nr_roots + 1;}                                                 \n" \

          "                                                                 \n" \

           "                                                                \n" \

            "            barrier(CLK_GLOBAL_MEM_FENCE); \n   " \

                        "                              \n" \

             "           while(count < nr_roots ){   \n" \

              "           \n" \

                "              if(i==0){    \n" \

               "                 root = stack[pozition_local + count*j + k];\n" \

                "             succ_index[root] = 0; \n" \

                 "              nr_neigh = firstnbr[root+1] - firstnbr[root]; } \n" \

                  "           barrier(CLK_LOCAL_MEM_FENCE);\n" \

                   "         \n" \

                    "            neigh_per_thread = nr_neigh/size; \n" \

                     "       if(i<nr_neigh%size) \n" \

                      "          neigh_per_thread ++; \n" \

                       "     h = 0;  \n" \

                        "    while(h<neigh_per_thread)\n" \

                        "        {\n" \

                         "       node = nbr[firstnbr[root] + size*h + i];\n" \

                          "       \n" \

                           "     dw = atomic_cmpxchg(&d[node], -1, level_local + 1);\n" \

                            "    \n" \

                             "   if(dw == -1)\n" \

                              "          {\n" \

                                "         atomic_inc(&level[level_local + 1]);\n" \

                                  "       atomic_cmpxchg(found,0,1);\n" \

                                   "      dw = level_local + 1;\n" \

                                    "     gh = atomic_inc(nr_stack);\n" \

                                     "    stack[gh] = node;\n" \

                                     " \n" \

                                      "  }\n" \

                                "if(dw == level_local + 1)\n" \

                                 " {                                              \n" \

                                  "                                             \n" \

                                   "       temp = atomic_inc(&succ_index[root]);\n" \

                                   "      succ[firstnbr[root] + temp] = node;\n" \

                                   " GetSemaphor2(&sem[0]);     temporal = atomic_xchg(&sigma[node],0); temporal2=atomic_xchg(&sigma[root],sigma[root]);                                          \n" \

                                    "     atomic_xchg(&sigma[node],temporal+temporal2);ReleaseSemaphor2(&sem[0]);     \n" \

                                     "   }                              \n" \

                                "h++;                                   \n" \

                                "}                                      \n" \

                            "                                          \n" \

                       "if(glob%6==1) {atomic_add(&count,1);if(root==4&&nr1==1) BC[8] = 1;} \n" \

    "                   barrier(CLK_GLOBAL_MEM_FENCE); }  \n" \

                       " \n" \

               " barrier(CLK_LOCAL_MEM_FENCE);\n"

                "if(glob==0) {f= atomic_add(&level[level_local],0); atomic_add(pozition,f); atomic_add(nr_level,1); \n" \

                " }                                                     \n" \

                "                                                       \n" \

                " if(i==0) \n" \

                "       { atomic_add(global_sync,1); \n" \

                "        if ( k==0) { while(atomic_add(global_sync,0)< j); atomic_xchg(global_sync, 0); } \n" \

                "        else { while(atomic_add(global_sync,0) > 0); }} \n" \

                "barrier(CLK_LOCAL_MEM_FENCE);if(i==0) found_local = atomic_add(found,0);barrier(CLK_LOCAL_MEM_FENCE);\n" \

       "}  if(glob==11) BC[glob] = atomic_xchg(&sigma[11],sigma[11]); } \n";

0 Likes
1 Solution
t-man
Adept II

I managed to localize the problem. The idea was that one workgroup was going through the iteration much faster then the other, incrementing the global variable "nr_level" and making the other workgroups see a wrong value. Thank you yurtesen for all your help!

View solution in original post

0 Likes
40 Replies