- Published on
Hello World in CUDA
- Cuda hello world example
#include <stdio.h>
#include <cuda_runtime.h>
__global__ void helloWorldKernel() {
printf("Hello, World! from block %d, thread %d\n", blockIdx.x, threadIdx.x);
}
int main() {
// Check if CUDA is available
int deviceCount;
cudaError_t err = cudaGetDeviceCount(&deviceCount);
if (err != cudaSuccess) {
printf("CUDA error: %s\n", cudaGetErrorString(err));
return 1;
}
if (deviceCount == 0) {
printf("No CUDA-capable devices found\n");
return 1;
}
printf("Found %d CUDA device(s)\n", deviceCount);
// Get device properties
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
printf("Using device: %s\n", deviceProp.name);
// Launch the kernel with 1 block and 5 threads
helloWorldKernel<<<1, 5>>>();
// Synchronize to ensure kernel execution completes
err = cudaDeviceSynchronize();
if (err != cudaSuccess) {
printf("CUDA kernel error: %s\n", cudaGetErrorString(err));
return 1;
}
// Reset the device
cudaDeviceReset();
printf("Program completed successfully\n");
return 0;
}
Output
nvcc warning : Support for offline compilation for architectures prior to '<compute/sm/lto>_75' will be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
Found 1 CUDA device(s)
Using device: NVIDIA GeForce MX150
Hello, World! from block 0, thread 0
Hello, World! from block 0, thread 1
Hello, World! from block 0, thread 2
Hello, World! from block 0, thread 3
Hello, World! from block 0, thread 4
Hello, World! from block 1, thread 0
Hello, World! from block 1, thread 1
Hello, World! from block 1, thread 2
Hello, World! from block 1, thread 3
Hello, World! from block 1, thread 4
Program completed successfully
THE END