6 Replies Latest reply on May 27, 2011 2:38 AM by jch

    Using MPI(mpich2) with -machinefile, CAL can't find GPU Devices.

    jch
      windows 7(x64), SDK 2.4, catalyst 11.5, mpich2-1.3.2p1

      Hi!
      I am making a CAL-MPI program that process parallel in GPU Cluster. My problem is that 'mpiexec with -localonly option' find devices, but 'mpiexec with -machinefile option' can't find devices.

      My code and running result are provided below. How can MPI find GPU devices with -machinefile option?

      [Result 1]
      C:\mpi_test>mpiexec -localonly -n 2 mpi_test.exe
      numDevices = 00000004
      numDevices = 00000004

      [Result 2]
      C:\mpi_test>mpiexec -n 2 mpi_test.exe
      numDevices = 00000000
      numDevices = 00000000

      [Result 3]
      C:\mpi_test>mpiexec -machinefile host.txt -n 2 mpi_test.exe
      numDevices = 00000000
      numDevices = 00000000

      [host.txt] file
      ----------------
      localhost:2
      ----------------

      #include <stdio.h> #include <mpi.h> #include "cal.h" #include "calcl.h" #include <windows.h> #define MASTER_NODE 0 CALAPI CALresult (CALAPIENTRY *calInit)(void); CALAPI CALresult (CALAPIENTRY *calShutdown)(void); CALAPI CALresult (CALAPIENTRY *calDeviceGetCount)(CALuint* count); int LoadLibraryCal() { HINSTANCE hDLL; if((hDLL = LoadLibraryA("aticalrt64.dll")) == 0) return FALSE; if((calInit = (CALresult (__cdecl *)(void)) GetProcAddress(hDLL, "calInit")) == 0) return FALSE; if((calShutdown = (CALresult (__cdecl *)(void))GetProcAddress(hDLL, "calShutdown")) == 0) return FALSE; if((calDeviceGetCount = (CALresult (__cdecl *)(CALuint *))GetProcAddress(hDLL, "calDeviceGetCount")) == 0) return FALSE; return TRUE; } int main(int argc, char *argv[]) { int size_mpi, rank; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size_mpi); MPI_Comm_rank(MPI_COMM_WORLD, &rank); CALuint numDevices = 0; if(LoadLibraryCal()==FALSE) return -1; if(calInit()!=CAL_RESULT_OK) return -1; if ( rank == MASTER_NODE ) { if(calDeviceGetCount(&numDevices)!=CAL_RESULT_OK) return -1; printf("numDevices = %08x\n",numDevices); } else { if(calDeviceGetCount(&numDevices)!=CAL_RESULT_OK) return -1; printf("numDevices = %08x\n",numDevices); } MPI_Finalize(); calShutdown(); return 0; }