Problems creating green context

Hello,
I’m running my first tests with the Green Context technology to familiarize myself. I’ve created a simple example that does the following:

  1. Initializes CUDA and obtains the first available device.
    
  2. Queries how many SMs (Streaming Multiprocessors) the GPU has.
    
  3. Allocates memory on the GPU.
    
  4. Launches a very simple kernel that prints which block it's running in.
    
  5. Synchronizes, frees the memory, and destroys the CUDA context.
    

🖥️ System Info:

  • GPU: NVIDIA Orin (nvgpu)
  • CUDA Version: 12.6
  • Driver Version: 540.4.0
  • OS: Ubuntu 20.04
  • Compiler: nvcc from CUDA 12.6

I attached the code:

#include <iostream>
#include <cuda_runtime.h>
#include <cuda.h>
#include <cuda/experimental/green_context.cuh>
#include <cuda_runtime_api.h>
#include <cstring>  // For memset

using namespace std;

// Simple kernel to test execution
__global__ void simple_kernel(CUdeviceptr d_ptr) {
    printf("Executing kernel on SM %d\n", blockIdx.x);
}

int main() {
    CUresult res;
    CUdevice device;
    CUgreenCtx green_ctx = nullptr;  // Make sure it's properly initialized
    CUcontext normal_ctx = nullptr;
    CUdeviceptr d_ptr;
    size_t size = 1024;
    CUdevResourceDesc desc;
    memset(&desc, 0, sizeof(desc));
    unsigned int falgs = 0;

    // Initialize CUDA Driver API
    res = cuInit(0);
    if (res != CUDA_SUCCESS) {
        cerr << "CUDA Initialization failed!" << endl;
        return -1;
    }

    // Check for available CUDA devices
    int device_count;
    res = cuDeviceGetCount(&device_count);
    if (device_count == 0) {
        cerr << "No CUDA devices found!" << endl;
        return -1;
    }
    cout << "Found " << device_count << " CUDA device(s)." << endl;

    // Get the first CUDA device
    res = cuDeviceGet(&device, 0);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to get CUDA device!" << endl;
        return -1;
    }

    // Query some device attributes
    int sm_count;
    res = cuDeviceGetAttribute(&sm_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to get device attributes!" << endl;
        return -1;
    }
    cout << "Device has " << sm_count << " Streaming Multiprocessors." << endl;

    // Initialize the resource descriptor

    // If the documentation or header defines a version field, initialize it, for example:
    // desc.version = CU_DEV_RESOURCE_DESC_VERSION;

    // Try to create a green context using a valid scheduling flag
    res = cuGreenCtxCreate(&green_ctx, desc, device, falgs);
    if (res != CUDA_SUCCESS) {
        const char *err_str;
        cuGetErrorString(res, &err_str);
        cerr << "Error creating green context: " << err_str << endl;
        cerr << "Falling back to default CUDA context creation." << endl;
        // If green context creation fails, create a normal CUDA context
        res = cuCtxCreate(&normal_ctx, CU_CTX_SCHED_AUTO, device);
        if (res != CUDA_SUCCESS) {
            cerr << "Failed to create default CUDA context!" << endl;
            return -1;
        }
    } else {
        cout << "Green context created successfully!" << endl;
        // Convert CUgreenCtx to CUcontext to use it in subsequent calls
        normal_ctx = reinterpret_cast<CUcontext>(green_ctx);
    }

    // Set the current context
    res = cuCtxSetCurrent(normal_ctx);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to set the current context!" << endl;
        return -1;
    }

    // Allocate memory on the device
    res = cuMemAlloc(&d_ptr, size);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to allocate device memory!" << endl;
        return -1;
    }

    // Launch a simple kernel passing the memory pointer
    simple_kernel<<<1, 1>>>(d_ptr);

    // Synchronize to wait for kernel execution to complete
    res = cuCtxSynchronize();
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to synchronize CUDA context!" << endl;
        return -1;
    }

    // Free device memory
    res = cuMemFree(d_ptr);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to free device memory!" << endl;
        return -1;
    }

    // Destroy the CUDA context
    res = cuCtxDestroy_v2(normal_ctx);
    if (res != CUDA_SUCCESS) {
        cerr << "Failed to destroy the context!" << endl;
        return -1;
    }

    cout << "Context destroyed successfully!" << endl;
    return 0;
}

After compiling and running it, I get the following error when creating the green context, indicating that the parameters I’m passing to create the context are incorrect, and I don’t know what it’s due to.

Found 1 CUDA device(s).
Device has 8 Streaming Multiprocessors.
Error creating green context: invalid argument
Falling back to default CUDA context creation.
Executing kernel on SM 0
Context destroyed successfully!

You don’t seem to have followed the recipe here.

Here is an example that runs without any runtime errors on CUDA 12.8

1 Like

Hi Robert,

First of all, thank you very much for your response. I tried the thing you linked, and it seems to create the context correctly, but I get an error in the device context:

Testing the compatibility of CUDA streams with green context and CUDA Async Memcopy
CUDA error in file ‘main.cu’ at line 62: invalid device context

I guess the link is what I would expect; on the contrary, the example I found here did work for me, so thanks again.

Yes, in fact that is mentioned in that thread, and it is declared fixed in CUDA 12.8. So when you run on CUDA 12.6 it is expected that you will get that error, just as described in that thread. As I indicated above:

1 Like

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.