[clang-tools-extra] [clang-tidy] Add documentation and smoke test for CUDA (PR #173699)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Dec 27 02:25:09 PST 2025
https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/173699
>From 1fe8fff7fdf129c117d86d4bac19c877032b5f3d Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 12:12:09 +0800
Subject: [PATCH 1/3] [clang-tidy] Add documentation and smoke test for CUDA
---
clang-tools-extra/docs/clang-tidy/index.rst | 14 ++++++++++++++
.../test/clang-tidy/infrastructure/basic-cuda.cu | 9 +++++++++
2 files changed, 23 insertions(+)
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 34da529902308..38aabc77540cf 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -349,6 +349,20 @@ An overview of all the command-line options:
some-check.SomeOption: 'some value'
...
+Running Clang-Tidy on CUDA Files
+--------------------------------
+
+:program:`clang-tidy` supports analyzing CUDA source files.
+To correctly process host-side code, specify the CUDA toolkit path using
+``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``.
+
+.. code-block:: console
+
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only
+
+Using ``--cuda-host-only`` is recommended as it skips device-side compilation,
+speeding up the analysis and avoiding potential device-specific errors.
+
Clang-Tidy Automation
=====================
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
new file mode 100644
index 0000000000000..3bc605d864461
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -0,0 +1,9 @@
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s
+
+#define __global__ __attribute__((global))
+
+// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr]
+__global__ void kernel(int *p) { p = 0; }
+
+// CHECK: :[[@LINE+1]]:11: warning: use nullptr [modernize-use-nullptr]
+void *p = 0;
>From 13e3f45d598d5a62e475c0256b629a6e4fad103c Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 17:53:47 +0800
Subject: [PATCH 2/3] Address review feedback
---
clang-tools-extra/docs/clang-tidy/index.rst | 17 +-
.../usr/local/cuda/include/cuda_runtime.h | 253 ++++++++++++++++++
.../clang-tidy/infrastructure/basic-cuda.cu | 11 +-
3 files changed, 273 insertions(+), 8 deletions(-)
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 38aabc77540cf..4a0bab8693878 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -352,16 +352,21 @@ An overview of all the command-line options:
Running Clang-Tidy on CUDA Files
--------------------------------
-:program:`clang-tidy` supports analyzing CUDA source files.
-To correctly process host-side code, specify the CUDA toolkit path using
-``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``.
+:program:`clang-tidy` supports analyzing CUDA source files. To ensure correct
+header resolution, it is important to specify the CUDA toolkit path using
+``--cuda-path``. For more details on how Clang handles CUDA, see
+`Compiling CUDA with Clang <https://llvm.org/docs/CompileCudaWithLLVM.html>`_.
.. code-block:: console
- $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda
-Using ``--cuda-host-only`` is recommended as it skips device-side compilation,
-speeding up the analysis and avoiding potential device-specific errors.
+By default, :program:`clang-tidy` will compile the code for the host. To
+analyze device-side code, use the ``--cuda-device-only`` flag:
+
+.. code-block:: console
+
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-device-only
Clang-Tidy Automation
=====================
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
new file mode 100644
index 0000000000000..421fa4dd7dbae
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
@@ -0,0 +1,253 @@
+/* Minimal declarations for CUDA support. Testing purposes only. */
+
+#include <stddef.h>
+
+#if __HIP__ || __CUDA__
+#define __constant__ __attribute__((constant))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __host__ __attribute__((host))
+#define __shared__ __attribute__((shared))
+#if __HIP__
+#define __managed__ __attribute__((managed))
+#endif
+#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
+#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__)))
+#define __no_cluster__ __attribute__((no_cluster))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __managed__
+#define __launch_bounds__(...)
+#define __grid_constant__
+#define __cluster_dims__(...)
+#define __no_cluster__
+#endif
+
+struct dim3 {
+ unsigned x, y, z;
+ __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
+};
+
+#if __HIP__ || HIP_PLATFORM
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
+extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__
+#elif __OFFLOAD_VIA_LLVM__
+extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim,
+ size_t sharedMem = 0, void *stream = 0);
+extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ void **args, size_t sharedMem = 0, void *stream = 0);
+#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" __device__ cudaError_t cudaLaunchDevice(void *func,
+ void *parameterBuffer,
+ dim3 gridDim, dim3 blockDim,
+ unsigned int sharedMem,
+ cudaStream_t stream);
+extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment,
+ size_t size);
+#endif
+
+extern "C" __device__ int printf(const char*, ...);
+
+struct char1 {
+ char x;
+ __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+ char x, y;
+ __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+ char x, y, z, w;
+ __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+ unsigned char x;
+ __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+ unsigned char x, y;
+ __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
+};
+struct uchar4 {
+ unsigned char x, y, z, w;
+ __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+ short x;
+ __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+ short x, y;
+ __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+ short x, y, z, w;
+ __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+ unsigned short x;
+ __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+ unsigned short x, y;
+ __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
+};
+struct ushort4 {
+ unsigned short x, y, z, w;
+ __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+ int x;
+ __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+ int x, y;
+ __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+ int x, y, z, w;
+ __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+ unsigned x;
+ __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+ unsigned x, y;
+ __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+ unsigned x, y, z;
+ __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
+};
+struct uint4 {
+ unsigned x, y, z, w;
+ __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+ long long x;
+ __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+ long long x, y;
+ __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
+};
+struct longlong4 {
+ long long x, y, z, w;
+ __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+ unsigned long long x;
+ __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+ unsigned long long x, y;
+ __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+ unsigned long long x, y, z, w;
+ __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct float1 {
+ float x;
+ __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+ float x, y;
+ __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+ float x, y, z, w;
+ __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+ double x;
+ __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+ double x, y;
+ __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+ double x, y, z, w;
+ __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+ cudaReadModeNormalizedFloat,
+ cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+ cudaBoundaryModeZero,
+ cudaBoundaryModeClamp,
+ cudaBoundaryModeTrap
+};
+
+enum {
+ cudaTextureType1D,
+ cudaTextureType2D,
+ cudaTextureType3D,
+ cudaTextureTypeCubemap,
+ cudaTextureType1DLayered,
+ cudaTextureType2DLayered,
+ cudaTextureTypeCubemapLayered
+};
+
+struct textureReference { };
+template <class T, int texType = cudaTextureType1D,
+ enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+ : public textureReference {};
+
+struct surfaceReference { int desc; };
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
index 3bc605d864461..37b3b5ab7ade6 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -1,6 +1,13 @@
-// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
+// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-host-only | FileCheck %s
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
+// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-device-only | FileCheck %s
-#define __global__ __attribute__((global))
+#include <cuda_runtime.h>
// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr]
__global__ void kernel(int *p) { p = 0; }
>From 05c0eb5d389cad87e6c6ff680801e1bb1e1d85ad Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 18:21:41 +0800
Subject: [PATCH 3/3] Cleanup
---
.../CUDA/{usr/local/cuda/include => }/cuda_runtime.h | 0
.../test/clang-tidy/infrastructure/basic-cuda.cu | 8 ++++----
2 files changed, 4 insertions(+), 4 deletions(-)
rename clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/{usr/local/cuda/include => }/cuda_runtime.h (100%)
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
similarity index 100%
rename from clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
rename to clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
index 37b3b5ab7ade6..db0260b9bc5d8 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -1,10 +1,10 @@
// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-path=%S/Inputs/CUDA \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
// RUN: --cuda-host-only | FileCheck %s
// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-path=%S/Inputs/CUDA \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
// RUN: --cuda-device-only | FileCheck %s
#include <cuda_runtime.h>
More information about the cfe-commits
mailing list