1 Reply Latest reply on Jul 3, 2013 8:58 PM by paciorek

    call to ACML linear algebra routine causes subsequent threaded code within a fork to hang

    paciorek

      Hi, When I call some ACML linear algebra routines (in particular DGESDD, but also DGESV (I believe) and possibly others), they cause subsequent threaded code within a fork to hang when OMP_NUM_THREADS is greater than 1. If I just call the threaded code within a fork, without having the initial ACML call I don't see the problem. We're running Ubuntu 12.04 with ACML 5.2, in particular gfortran64_fma4_mp on AMD Opteron(TM) Processor 6272. We see the same problem with gfortran64_mp on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz. Here's some example C++ (really mostly just C) code. The first fork with embedded threaded code works fine, but then after calling DGESDD the subsequent identical fork with embedded threaded code fails.

       

      
      

      #include <unistd.h>    
      #include <sys/types.h> 
      #include <errno.h>     
      #include <stdio.h>     
      #include <sys/wait.h>  
      #include <stdlib.h>    
      #include <omp.h>

       

      // compile with:  g++ testThrSVD.cpp  -o testThrSVD -L/usr/local/lib -fopenmp  -llapack -lblas
      extern "C" int dgesdd_(char* jobz, int* m, int* n, double* A, int* lda, double* S, double* U, int* ldu, double* VT, int* LDVT, double* work, int* lwork, int* iwork, int* info);

       

      int main()
      {
         pid_t childpid; /* variable to store the child's pid */
          int retval;     /* child process: user-provided return code */
          int status;     /* parent process: child's exit status */

       

          int nThreads, myID;
          double* input;

       

          // set-up stuff for the linear algebra
          int size = 3;
          char jobz = 'A';
          double* x = new double[size*size];
          double* U = new double[size*size];
          double* VT = new double[size*size];
          double* S = new double[size];
          int lwork = 3*size + 4*size*size+4*size;
          double* work = new double[lwork];
          int* iwork = new int[8*size];
          int info = 0;
         
          for(int i = 0; i < size*size; i++){
            U[i] = 0.0;
            VT[i] = 0.0;
            x[i] = 0.0;
          }
          for(int i = 0; i < size; i++){
            x[i] = 1.0;
          }

       

          // FIRST TRY AT FORKING
          childpid = fork(); 


          if (childpid >= 0) /* fork succeeded */
          {
              if (childpid == 0) /* fork() returns 0 to the child process */
              {
                  printf("CHILD: I am the child process!\n");
                  printf("CHILD: Here's my PID: %d\n", getpid());
                  printf("CHILD: My parent's PID is: %d\n", getppid());

       

                // some simple threaded code within a fork
                #pragma omp parallel private (nThreads, myID)
                { // beginning of block
                  myID = omp_get_thread_num();
                  printf("CHILD fork: Hello I am thread %i\n", myID);
                  if (myID == 0)
                  {
                    nThreads = omp_get_num_threads();
                    printf("CHILD fork: I'm the boss (within the child) and control %i threads\n", nThreads);
                  }
                } // end of block

       

              }
              else /* fork() returns new pid to the parent process */
              {
                  printf("PARENT: I am the parent process!\n");
                  printf("PARENT: Here's my PID: %d\n", getpid());
                  printf("PARENT: The value of my copy of childpid is %d\n", childpid);
                  printf("PARENT: I will now wait for my child to exit.\n");
                  wait(&status); /* wait for child to exit, and store its status */
                  printf("PARENT: Child's exit code is: %d\n", WEXITSTATUS(status));
                  printf("PARENT: Goodbye!\n");            
                  exit(0);  /* parent exits */      
              }
          }   

       

          // now do some linear algebra

          dgesdd_(&jobz, &size, &size, x, &size, S, U, &size, VT, &size, work, &lwork, iwork, &info);

       

          // SECOND TRY AT FORKING

          childpid = fork();
         
          if (childpid >= 0) /* fork succeeded */
          {
              if (childpid == 0) /* fork() returns 0 to the child process */
              {
                  printf("CHILD: I am the child process!\n");
                  printf("CHILD: Here's my PID: %d\n", getpid());
                  printf("CHILD: My parent's PID is: %d\n", getppid());

       

                // some threaded code within a fork
                // code hangs in here for threads not numbered 0:
                #pragma omp parallel private (nThreads, myID)
                { // beginning of block
                  myID = omp_get_thread_num();
                  printf("CHILD fork: Hello I am thread %i\n", myID);
                  if (myID == 0)
                  {
                    nThreads = omp_get_num_threads();
                    printf("CHILD fork: I'm the boss (within the child) and control %i threads\n", nThreads);
                  }
                } // end of block

       

              }
              else /* fork() returns new pid to the parent process */
              {
                  printf("PARENT: I am the parent process!\n");
                  printf("PARENT: Here's my PID: %d\n", getpid());
                  printf("PARENT: The value of my copy of childpid is %d\n", childpid);
                  printf("PARENT: I will now wait for my child to exit.\n");
                  wait(&status); /* wait for child to exit, and store its status */
                  printf("PARENT: Child's exit code is: %d\n", WEXITSTATUS(status));
                  printf("PARENT: Goodbye!\n");            
                  exit(0);  /* parent exits */      
              }
          }

        return 0;
      }