Does anyone have any experience on two GPUs programming using CUDA+openMP?
Im having a trouble in my case:
#include <stdio.h>
#include <omp.h>
#include <stdlib.h>
static void HandleError(cudaError_t err, const char *file, int line){
if(err!=cudaSuccess){
printf("%s in %s at line %d
", cudaGetErrorString(err), file, line);
exit(EXIT_FAILURE);
}
}
#define HANDLE_ERROR(err)(HandleError(err, __FILE__, __LINE__))
__global__ void Test(int *x){
x[threadIdx.x]=1;
}
int main(){
int *x;
int iam;
omp_set_num_threads(2);
#pragma omp parallel private(x, iam)
{
iam=omp_get_thread_num();
cudaSetDevice(iam);
HANDLE_ERROR(cudaMalloc((void**)&x, 100*sizeof(int)));
#pragma omp barrier
if(iam==0){
Test<<1,100>>(x);
}
#pragma barrier
cudaDeviceSynchronize();
if(iam==1){
HANDLE_ERROR(cudaMemcpyPeer(x, 1, x, 0, 100*sizeof(int)));
}
#pragma omp barrier
}
cudaFree(x);
return 0;
}
Above is a simple test case I wrote. It’s just creating two threads, and grant controls of each GPU to each thread, repectively. Then GPU1 writes something into memory through kernel launch, and then GPU2 copy it back to its own memory using cudaMemcpyPeer. My platform is Tesla C2050 + GTS430, both are Fermi GPUs. I have to use cudaMemcpyPeer to recude data copy overhead. When I’m running the program, it shows me invalid argument, and cuda-gdb shows invalid device ordinal. Anybody has a clue on this will be much appreciated. Thank you!