AnsweredAssumed Answered

call to ACML linear algebra routine causes subsequent threaded code within a fork to hang

Question asked by paciorek on Jul 2, 2013
Latest reply on Jul 3, 2013 by paciorek

Hi, When I call some ACML linear algebra routines (in particular DGESDD, but also DGESV (I believe) and possibly others), they cause subsequent threaded code within a fork to hang when OMP_NUM_THREADS is greater than 1. If I just call the threaded code within a fork, without having the initial ACML call I don't see the problem. We're running Ubuntu 12.04 with ACML 5.2, in particular gfortran64_fma4_mp on AMD Opteron(TM) Processor 6272. We see the same problem with gfortran64_mp on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz. Here's some example C++ (really mostly just C) code. The first fork with embedded threaded code works fine, but then after calling DGESDD the subsequent identical fork with embedded threaded code fails.

 


#include <unistd.h>    
#include <sys/types.h> 
#include <errno.h>     
#include <stdio.h>     
#include <sys/wait.h>  
#include <stdlib.h>    
#include <omp.h>

 

// compile with:  g++ testThrSVD.cpp  -o testThrSVD -L/usr/local/lib -fopenmp  -llapack -lblas
extern "C" int dgesdd_(char* jobz, int* m, int* n, double* A, int* lda, double* S, double* U, int* ldu, double* VT, int* LDVT, double* work, int* lwork, int* iwork, int* info);

 

int main()
{
   pid_t childpid; /* variable to store the child's pid */
    int retval;     /* child process: user-provided return code */
    int status;     /* parent process: child's exit status */

 

    int nThreads, myID;
    double* input;

 

    // set-up stuff for the linear algebra
    int size = 3;
    char jobz = 'A';
    double* x = new double[size*size];
    double* U = new double[size*size];
    double* VT = new double[size*size];
    double* S = new double[size];
    int lwork = 3*size + 4*size*size+4*size;
    double* work = new double[lwork];
    int* iwork = new int[8*size];
    int info = 0;
   
    for(int i = 0; i < size*size; i++){
      U[i] = 0.0;
      VT[i] = 0.0;
      x[i] = 0.0;
    }
    for(int i = 0; i < size; i++){
      x[i] = 1.0;
    }

 

    // FIRST TRY AT FORKING
    childpid = fork(); 


    if (childpid >= 0) /* fork succeeded */
    {
        if (childpid == 0) /* fork() returns 0 to the child process */
        {
            printf("CHILD: I am the child process!\n");
            printf("CHILD: Here's my PID: %d\n", getpid());
            printf("CHILD: My parent's PID is: %d\n", getppid());

 

          // some simple threaded code within a fork
          #pragma omp parallel private (nThreads, myID)
          { // beginning of block
            myID = omp_get_thread_num();
            printf("CHILD fork: Hello I am thread %i\n", myID);
            if (myID == 0)
            {
              nThreads = omp_get_num_threads();
              printf("CHILD fork: I'm the boss (within the child) and control %i threads\n", nThreads);
            }
          } // end of block

 

        }
        else /* fork() returns new pid to the parent process */
        {
            printf("PARENT: I am the parent process!\n");
            printf("PARENT: Here's my PID: %d\n", getpid());
            printf("PARENT: The value of my copy of childpid is %d\n", childpid);
            printf("PARENT: I will now wait for my child to exit.\n");
            wait(&status); /* wait for child to exit, and store its status */
            printf("PARENT: Child's exit code is: %d\n", WEXITSTATUS(status));
            printf("PARENT: Goodbye!\n");            
            exit(0);  /* parent exits */      
        }
    }   

 

    // now do some linear algebra

    dgesdd_(&jobz, &size, &size, x, &size, S, U, &size, VT, &size, work, &lwork, iwork, &info);

 

    // SECOND TRY AT FORKING

    childpid = fork();
   
    if (childpid >= 0) /* fork succeeded */
    {
        if (childpid == 0) /* fork() returns 0 to the child process */
        {
            printf("CHILD: I am the child process!\n");
            printf("CHILD: Here's my PID: %d\n", getpid());
            printf("CHILD: My parent's PID is: %d\n", getppid());

 

          // some threaded code within a fork
          // code hangs in here for threads not numbered 0:
          #pragma omp parallel private (nThreads, myID)
          { // beginning of block
            myID = omp_get_thread_num();
            printf("CHILD fork: Hello I am thread %i\n", myID);
            if (myID == 0)
            {
              nThreads = omp_get_num_threads();
              printf("CHILD fork: I'm the boss (within the child) and control %i threads\n", nThreads);
            }
          } // end of block

 

        }
        else /* fork() returns new pid to the parent process */
        {
            printf("PARENT: I am the parent process!\n");
            printf("PARENT: Here's my PID: %d\n", getpid());
            printf("PARENT: The value of my copy of childpid is %d\n", childpid);
            printf("PARENT: I will now wait for my child to exit.\n");
            wait(&status); /* wait for child to exit, and store its status */
            printf("PARENT: Child's exit code is: %d\n", WEXITSTATUS(status));
            printf("PARENT: Goodbye!\n");            
            exit(0);  /* parent exits */      
        }
    }

  return 0;
}

Outcomes