jch

Using MPI(mpich2) with -machinefile, CAL can't find GPU Devices.

Discussion created by jch on May 24, 2011
Latest reply on May 27, 2011 by jch
windows 7(x64), SDK 2.4, catalyst 11.5, mpich2-1.3.2p1

Hi!
I am making a CAL-MPI program that process parallel in GPU Cluster. My problem is that 'mpiexec with -localonly option' find devices, but 'mpiexec with -machinefile option' can't find devices.

My code and running result are provided below. How can MPI find GPU devices with -machinefile option?

[Result 1]
C:\mpi_test>mpiexec -localonly -n 2 mpi_test.exe
numDevices = 00000004
numDevices = 00000004

[Result 2]
C:\mpi_test>mpiexec -n 2 mpi_test.exe
numDevices = 00000000
numDevices = 00000000

[Result 3]
C:\mpi_test>mpiexec -machinefile host.txt -n 2 mpi_test.exe
numDevices = 00000000
numDevices = 00000000

[host.txt] file
----------------
localhost:2
----------------

#include <stdio.h> #include <mpi.h> #include "cal.h" #include "calcl.h" #include <windows.h> #define MASTER_NODE 0 CALAPI CALresult (CALAPIENTRY *calInit)(void); CALAPI CALresult (CALAPIENTRY *calShutdown)(void); CALAPI CALresult (CALAPIENTRY *calDeviceGetCount)(CALuint* count); int LoadLibraryCal() { HINSTANCE hDLL; if((hDLL = LoadLibraryA("aticalrt64.dll")) == 0) return FALSE; if((calInit = (CALresult (__cdecl *)(void)) GetProcAddress(hDLL, "calInit")) == 0) return FALSE; if((calShutdown = (CALresult (__cdecl *)(void))GetProcAddress(hDLL, "calShutdown")) == 0) return FALSE; if((calDeviceGetCount = (CALresult (__cdecl *)(CALuint *))GetProcAddress(hDLL, "calDeviceGetCount")) == 0) return FALSE; return TRUE; } int main(int argc, char *argv[]) { int size_mpi, rank; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &size_mpi); MPI_Comm_rank(MPI_COMM_WORLD, &rank); CALuint numDevices = 0; if(LoadLibraryCal()==FALSE) return -1; if(calInit()!=CAL_RESULT_OK) return -1; if ( rank == MASTER_NODE ) { if(calDeviceGetCount(&numDevices)!=CAL_RESULT_OK) return -1; printf("numDevices = %08x\n",numDevices); } else { if(calDeviceGetCount(&numDevices)!=CAL_RESULT_OK) return -1; printf("numDevices = %08x\n",numDevices); } MPI_Finalize(); calShutdown(); return 0; }

Outcomes