Hello,
I’m running my first tests with the Green Context technology to familiarize myself. I’ve created a simple example that does the following:
-
Initializes CUDA and obtains the first available device.
-
Queries how many SMs (Streaming Multiprocessors) the GPU has.
-
Allocates memory on the GPU.
-
Launches a very simple kernel that prints which block it's running in.
-
Synchronizes, frees the memory, and destroys the CUDA context.
🖥️ System Info:
- GPU: NVIDIA Orin (nvgpu)
- CUDA Version: 12.6
- Driver Version: 540.4.0
- OS: Ubuntu 20.04
- Compiler:
nvcc
from CUDA 12.6
I attached the code:
#include <iostream>
#include <cuda_runtime.h>
#include <cuda.h>
#include <cuda/experimental/green_context.cuh>
#include <cuda_runtime_api.h>
#include <cstring> // For memset
using namespace std;
// Simple kernel to test execution
__global__ void simple_kernel(CUdeviceptr d_ptr) {
printf("Executing kernel on SM %d\n", blockIdx.x);
}
int main() {
CUresult res;
CUdevice device;
CUgreenCtx green_ctx = nullptr; // Make sure it's properly initialized
CUcontext normal_ctx = nullptr;
CUdeviceptr d_ptr;
size_t size = 1024;
CUdevResourceDesc desc;
memset(&desc, 0, sizeof(desc));
unsigned int falgs = 0;
// Initialize CUDA Driver API
res = cuInit(0);
if (res != CUDA_SUCCESS) {
cerr << "CUDA Initialization failed!" << endl;
return -1;
}
// Check for available CUDA devices
int device_count;
res = cuDeviceGetCount(&device_count);
if (device_count == 0) {
cerr << "No CUDA devices found!" << endl;
return -1;
}
cout << "Found " << device_count << " CUDA device(s)." << endl;
// Get the first CUDA device
res = cuDeviceGet(&device, 0);
if (res != CUDA_SUCCESS) {
cerr << "Failed to get CUDA device!" << endl;
return -1;
}
// Query some device attributes
int sm_count;
res = cuDeviceGetAttribute(&sm_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
if (res != CUDA_SUCCESS) {
cerr << "Failed to get device attributes!" << endl;
return -1;
}
cout << "Device has " << sm_count << " Streaming Multiprocessors." << endl;
// Initialize the resource descriptor
// If the documentation or header defines a version field, initialize it, for example:
// desc.version = CU_DEV_RESOURCE_DESC_VERSION;
// Try to create a green context using a valid scheduling flag
res = cuGreenCtxCreate(&green_ctx, desc, device, falgs);
if (res != CUDA_SUCCESS) {
const char *err_str;
cuGetErrorString(res, &err_str);
cerr << "Error creating green context: " << err_str << endl;
cerr << "Falling back to default CUDA context creation." << endl;
// If green context creation fails, create a normal CUDA context
res = cuCtxCreate(&normal_ctx, CU_CTX_SCHED_AUTO, device);
if (res != CUDA_SUCCESS) {
cerr << "Failed to create default CUDA context!" << endl;
return -1;
}
} else {
cout << "Green context created successfully!" << endl;
// Convert CUgreenCtx to CUcontext to use it in subsequent calls
normal_ctx = reinterpret_cast<CUcontext>(green_ctx);
}
// Set the current context
res = cuCtxSetCurrent(normal_ctx);
if (res != CUDA_SUCCESS) {
cerr << "Failed to set the current context!" << endl;
return -1;
}
// Allocate memory on the device
res = cuMemAlloc(&d_ptr, size);
if (res != CUDA_SUCCESS) {
cerr << "Failed to allocate device memory!" << endl;
return -1;
}
// Launch a simple kernel passing the memory pointer
simple_kernel<<<1, 1>>>(d_ptr);
// Synchronize to wait for kernel execution to complete
res = cuCtxSynchronize();
if (res != CUDA_SUCCESS) {
cerr << "Failed to synchronize CUDA context!" << endl;
return -1;
}
// Free device memory
res = cuMemFree(d_ptr);
if (res != CUDA_SUCCESS) {
cerr << "Failed to free device memory!" << endl;
return -1;
}
// Destroy the CUDA context
res = cuCtxDestroy_v2(normal_ctx);
if (res != CUDA_SUCCESS) {
cerr << "Failed to destroy the context!" << endl;
return -1;
}
cout << "Context destroyed successfully!" << endl;
return 0;
}
After compiling and running it, I get the following error when creating the green context, indicating that the parameters I’m passing to create the context are incorrect, and I don’t know what it’s due to.
Found 1 CUDA device(s).
Device has 8 Streaming Multiprocessors.
Error creating green context: invalid argument
Falling back to default CUDA context creation.
Executing kernel on SM 0
Context destroyed successfully!