Published on

Hello World in CUDA

  • Cuda hello world example
#include <stdio.h>
#include <cuda_runtime.h>

__global__ void helloWorldKernel() {
    printf("Hello, World! from block %d, thread %d\n", blockIdx.x, threadIdx.x);
}

int main() {
    // Check if CUDA is available
    int deviceCount;
    cudaError_t err = cudaGetDeviceCount(&deviceCount);
    if (err != cudaSuccess) {
        printf("CUDA error: %s\n", cudaGetErrorString(err));
        return 1;
    }
    if (deviceCount == 0) {
        printf("No CUDA-capable devices found\n");
        return 1;
    }

    printf("Found %d CUDA device(s)\n", deviceCount);

    // Get device properties
    cudaDeviceProp deviceProp;
    cudaGetDeviceProperties(&deviceProp, 0);
    printf("Using device: %s\n", deviceProp.name);

    // Launch the kernel with 1 block and 5 threads
    helloWorldKernel<<<1, 5>>>();
    
    // Synchronize to ensure kernel execution completes
    err = cudaDeviceSynchronize();
    if (err != cudaSuccess) {
        printf("CUDA kernel error: %s\n", cudaGetErrorString(err));
        return 1;
    }

    // Reset the device
    cudaDeviceReset();
    printf("Program completed successfully\n");
    return 0;
}

Output

nvcc warning : Support for offline compilation for architectures prior to '<compute/sm/lto>_75' will be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).

Found 1 CUDA device(s)
Using device: NVIDIA GeForce MX150
Hello, World! from block 0, thread 0
Hello, World! from block 0, thread 1
Hello, World! from block 0, thread 2
Hello, World! from block 0, thread 3
Hello, World! from block 0, thread 4
Hello, World! from block 1, thread 0
Hello, World! from block 1, thread 1
Hello, World! from block 1, thread 2
Hello, World! from block 1, thread 3
Hello, World! from block 1, thread 4
Program completed successfully

THE END