[clang-tools-extra] [clang-tidy] Add documentation and smoke test for CUDA (PR #173699)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Dec 28 02:13:35 PST 2025
https://github.com/zeyi2 updated https://github.com/llvm/llvm-project/pull/173699
>From 1fe8fff7fdf129c117d86d4bac19c877032b5f3d Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 12:12:09 +0800
Subject: [PATCH 1/4] [clang-tidy] Add documentation and smoke test for CUDA
---
clang-tools-extra/docs/clang-tidy/index.rst | 14 ++++++++++++++
.../test/clang-tidy/infrastructure/basic-cuda.cu | 9 +++++++++
2 files changed, 23 insertions(+)
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 34da529902308..38aabc77540cf 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -349,6 +349,20 @@ An overview of all the command-line options:
some-check.SomeOption: 'some value'
...
+Running Clang-Tidy on CUDA Files
+--------------------------------
+
+:program:`clang-tidy` supports analyzing CUDA source files.
+To correctly process host-side code, specify the CUDA toolkit path using
+``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``.
+
+.. code-block:: console
+
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only
+
+Using ``--cuda-host-only`` is recommended as it skips device-side compilation,
+speeding up the analysis and avoiding potential device-specific errors.
+
Clang-Tidy Automation
=====================
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
new file mode 100644
index 0000000000000..3bc605d864461
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -0,0 +1,9 @@
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s
+
+#define __global__ __attribute__((global))
+
+// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr]
+__global__ void kernel(int *p) { p = 0; }
+
+// CHECK: :[[@LINE+1]]:11: warning: use nullptr [modernize-use-nullptr]
+void *p = 0;
>From 13e3f45d598d5a62e475c0256b629a6e4fad103c Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 17:53:47 +0800
Subject: [PATCH 2/4] Address review feedback
---
clang-tools-extra/docs/clang-tidy/index.rst | 17 +-
.../usr/local/cuda/include/cuda_runtime.h | 253 ++++++++++++++++++
.../clang-tidy/infrastructure/basic-cuda.cu | 11 +-
3 files changed, 273 insertions(+), 8 deletions(-)
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 38aabc77540cf..4a0bab8693878 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -352,16 +352,21 @@ An overview of all the command-line options:
Running Clang-Tidy on CUDA Files
--------------------------------
-:program:`clang-tidy` supports analyzing CUDA source files.
-To correctly process host-side code, specify the CUDA toolkit path using
-``--cuda-path`` and limit compilation to the host with ``--cuda-host-only``.
+:program:`clang-tidy` supports analyzing CUDA source files. To ensure correct
+header resolution, it is important to specify the CUDA toolkit path using
+``--cuda-path``. For more details on how Clang handles CUDA, see
+`Compiling CUDA with Clang <https://llvm.org/docs/CompileCudaWithLLVM.html>`_.
.. code-block:: console
- $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-host-only
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda
-Using ``--cuda-host-only`` is recommended as it skips device-side compilation,
-speeding up the analysis and avoiding potential device-specific errors.
+By default, :program:`clang-tidy` will compile the code for the host. To
+analyze device-side code, use the ``--cuda-device-only`` flag:
+
+.. code-block:: console
+
+ $ clang-tidy source.cu -- --cuda-path=/path/to/cuda --cuda-device-only
Clang-Tidy Automation
=====================
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
new file mode 100644
index 0000000000000..421fa4dd7dbae
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
@@ -0,0 +1,253 @@
+/* Minimal declarations for CUDA support. Testing purposes only. */
+
+#include <stddef.h>
+
+#if __HIP__ || __CUDA__
+#define __constant__ __attribute__((constant))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __host__ __attribute__((host))
+#define __shared__ __attribute__((shared))
+#if __HIP__
+#define __managed__ __attribute__((managed))
+#endif
+#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
+#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__)))
+#define __no_cluster__ __attribute__((no_cluster))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __managed__
+#define __launch_bounds__(...)
+#define __grid_constant__
+#define __cluster_dims__(...)
+#define __no_cluster__
+#endif
+
+struct dim3 {
+ unsigned x, y, z;
+ __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
+};
+
+#if __HIP__ || HIP_PLATFORM
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
+extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__
+#elif __OFFLOAD_VIA_LLVM__
+extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim,
+ size_t sharedMem = 0, void *stream = 0);
+extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ void **args, size_t sharedMem = 0, void *stream = 0);
+#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" __device__ cudaError_t cudaLaunchDevice(void *func,
+ void *parameterBuffer,
+ dim3 gridDim, dim3 blockDim,
+ unsigned int sharedMem,
+ cudaStream_t stream);
+extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment,
+ size_t size);
+#endif
+
+extern "C" __device__ int printf(const char*, ...);
+
+struct char1 {
+ char x;
+ __host__ __device__ char1(char x = 0) : x(x) {}
+};
+struct char2 {
+ char x, y;
+ __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
+};
+struct char4 {
+ char x, y, z, w;
+ __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uchar1 {
+ unsigned char x;
+ __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
+};
+struct uchar2 {
+ unsigned char x, y;
+ __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
+};
+struct uchar4 {
+ unsigned char x, y, z, w;
+ __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct short1 {
+ short x;
+ __host__ __device__ short1(short x = 0) : x(x) {}
+};
+struct short2 {
+ short x, y;
+ __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
+};
+struct short4 {
+ short x, y, z, w;
+ __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ushort1 {
+ unsigned short x;
+ __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
+};
+struct ushort2 {
+ unsigned short x, y;
+ __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
+};
+struct ushort4 {
+ unsigned short x, y, z, w;
+ __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct int1 {
+ int x;
+ __host__ __device__ int1(int x = 0) : x(x) {}
+};
+struct int2 {
+ int x, y;
+ __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
+};
+struct int4 {
+ int x, y, z, w;
+ __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct uint1 {
+ unsigned x;
+ __host__ __device__ uint1(unsigned x = 0) : x(x) {}
+};
+struct uint2 {
+ unsigned x, y;
+ __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
+};
+struct uint3 {
+ unsigned x, y, z;
+ __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
+};
+struct uint4 {
+ unsigned x, y, z, w;
+ __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct longlong1 {
+ long long x;
+ __host__ __device__ longlong1(long long x = 0) : x(x) {}
+};
+struct longlong2 {
+ long long x, y;
+ __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
+};
+struct longlong4 {
+ long long x, y, z, w;
+ __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct ulonglong1 {
+ unsigned long long x;
+ __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
+};
+struct ulonglong2 {
+ unsigned long long x, y;
+ __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
+};
+struct ulonglong4 {
+ unsigned long long x, y, z, w;
+ __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct float1 {
+ float x;
+ __host__ __device__ float1(float x = 0) : x(x) {}
+};
+struct float2 {
+ float x, y;
+ __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
+};
+struct float4 {
+ float x, y, z, w;
+ __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+struct double1 {
+ double x;
+ __host__ __device__ double1(double x = 0) : x(x) {}
+};
+struct double2 {
+ double x, y;
+ __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
+};
+struct double4 {
+ double x, y, z, w;
+ __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
+};
+
+typedef unsigned long long cudaTextureObject_t;
+typedef unsigned long long cudaSurfaceObject_t;
+
+enum cudaTextureReadMode {
+ cudaReadModeNormalizedFloat,
+ cudaReadModeElementType
+};
+
+enum cudaSurfaceBoundaryMode {
+ cudaBoundaryModeZero,
+ cudaBoundaryModeClamp,
+ cudaBoundaryModeTrap
+};
+
+enum {
+ cudaTextureType1D,
+ cudaTextureType2D,
+ cudaTextureType3D,
+ cudaTextureTypeCubemap,
+ cudaTextureType1DLayered,
+ cudaTextureType2DLayered,
+ cudaTextureTypeCubemapLayered
+};
+
+struct textureReference { };
+template <class T, int texType = cudaTextureType1D,
+ enum cudaTextureReadMode mode = cudaReadModeElementType>
+struct __attribute__((device_builtin_texture_type)) texture
+ : public textureReference {};
+
+struct surfaceReference { int desc; };
+
+template <typename T, int dim = 1>
+struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
index 3bc605d864461..37b3b5ab7ade6 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -1,6 +1,13 @@
-// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- -nocudainc -nocudalib --cuda-host-only | FileCheck %s
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
+// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-host-only | FileCheck %s
+// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
+// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-device-only | FileCheck %s
-#define __global__ __attribute__((global))
+#include <cuda_runtime.h>
// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr]
__global__ void kernel(int *p) { p = 0; }
>From 05c0eb5d389cad87e6c6ff680801e1bb1e1d85ad Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sat, 27 Dec 2025 18:21:41 +0800
Subject: [PATCH 3/4] Cleanup
---
.../CUDA/{usr/local/cuda/include => }/cuda_runtime.h | 0
.../test/clang-tidy/infrastructure/basic-cuda.cu | 8 ++++----
2 files changed, 4 insertions(+), 4 deletions(-)
rename clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/{usr/local/cuda/include => }/cuda_runtime.h (100%)
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
similarity index 100%
rename from clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/usr/local/cuda/include/cuda_runtime.h
rename to clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
index 37b3b5ab7ade6..db0260b9bc5d8 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
@@ -1,10 +1,10 @@
// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-path=%S/Inputs/CUDA \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
// RUN: --cuda-host-only | FileCheck %s
// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA/usr/local/cuda \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA/usr/local/cuda/include \
+// RUN: --cuda-path=%S/Inputs/CUDA \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
// RUN: --cuda-device-only | FileCheck %s
#include <cuda_runtime.h>
>From b1d5098b114341da7487e453ff7a46841d835d9f Mon Sep 17 00:00:00 2001
From: mtx <mitchell.xu2 at gmail.com>
Date: Sun, 28 Dec 2025 18:12:35 +0800
Subject: [PATCH 4/4] Address review feedback
---
clang-tools-extra/docs/clang-tidy/index.rst | 16 +-
.../test/clang-tidy/check_clang_tidy.py | 2 +-
.../infrastructure/Inputs/CUDA/cuda.h | 64 +++++
.../infrastructure/Inputs/CUDA/cuda_runtime.h | 253 ------------------
.../clang-tidy/infrastructure/basic-cuda.cu | 16 --
.../clang-tidy/infrastructure/cuda-basic.cu | 13 +
6 files changed, 91 insertions(+), 273 deletions(-)
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda.h
delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/cuda-basic.cu
diff --git a/clang-tools-extra/docs/clang-tidy/index.rst b/clang-tools-extra/docs/clang-tidy/index.rst
index 4a0bab8693878..6bd38db966396 100644
--- a/clang-tools-extra/docs/clang-tidy/index.rst
+++ b/clang-tools-extra/docs/clang-tidy/index.rst
@@ -357,12 +357,22 @@ header resolution, it is important to specify the CUDA toolkit path using
``--cuda-path``. For more details on how Clang handles CUDA, see
`Compiling CUDA with Clang <https://llvm.org/docs/CompileCudaWithLLVM.html>`_.
+If you are using a GCC + NVCC build setup, the compiler command database will
+contain NVCC-specific flags that :program:`clang-tidy` does not understand.
+
+In this case, you should use the ``RemovedArgs`` configuration option (or
+``--removed-arg`` command-line option) to remove these flags, and
+``ExtraArgs`` (or ``--extra-arg``) to provide the ``--cuda-path``.
+
+For example, to remove the NVCC-specific ``-xcu`` flag:
+
.. code-block:: console
- $ clang-tidy source.cu -- --cuda-path=/path/to/cuda
+ $ clang-tidy source.cu --removed-arg="-xcu" --extra-arg="--cuda-path=/path/to/cuda"
-By default, :program:`clang-tidy` will compile the code for the host. To
-analyze device-side code, use the ``--cuda-device-only`` flag:
+By default, :program:`clang-tidy` will use the host compilation, which is
+sufficient to analyze both host and device code. To specifically perform device
+compilation, use the ``--cuda-device-only`` flag:
.. code-block:: console
diff --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
index b173ecf4fbdca..4e42f0b12516b 100755
--- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
@@ -110,7 +110,7 @@ def __init__(self, args: argparse.Namespace, extra_args: List[str]) -> None:
file_name_with_extension = self.assume_file_name or self.input_file_name
_, extension = os.path.splitext(file_name_with_extension)
- if extension not in [".c", ".hpp", ".m", ".mm"]:
+ if extension not in [".c", ".hpp", ".m", ".mm", ".cu"]:
extension = ".cpp"
self.temp_file_name = self.temp_file_name + extension
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda.h
new file mode 100644
index 0000000000000..562b75025c798
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda.h
@@ -0,0 +1,64 @@
+/* Minimal declarations for CUDA support. Testing purposes only. */
+
+#include <stddef.h>
+
+// Make this file work with nvcc, for testing compatibility.
+
+#ifndef __NVCC__
+#define __constant__ __attribute__((constant))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __host__ __attribute__((host))
+#define __shared__ __attribute__((shared))
+#define __managed__ __attribute__((managed))
+#define __grid_constant__ __attribute__((grid_constant))
+#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__)))
+#define __no_cluster__ __attribute__((no_cluster))
+
+struct dim3 {
+ unsigned x, y, z;
+ __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
+};
+
+#ifdef __HIP__
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0); // NOLINT
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0); // NOLINT
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
+
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0); // NOLINT
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0); // NOLINT
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" __device__ cudaError_t cudaLaunchDevice(void *func,
+ void *parameterBuffer,
+ dim3 gridDim, dim3 blockDim,
+ unsigned int sharedMem,
+ cudaStream_t stream);
+extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment,
+ size_t size);
+#endif
+
+// Host- and device-side placement new overloads.
+void *operator new(__SIZE_TYPE__, void *p) { return p; }
+void *operator new[](__SIZE_TYPE__, void *p) { return p; }
+__device__ void *operator new(__SIZE_TYPE__, void *p) { return p; }
+__device__ void *operator new[](__SIZE_TYPE__, void *p) { return p; }
+
+#endif // !__NVCC__
\ No newline at end of file
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
deleted file mode 100644
index 421fa4dd7dbae..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/CUDA/cuda_runtime.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/* Minimal declarations for CUDA support. Testing purposes only. */
-
-#include <stddef.h>
-
-#if __HIP__ || __CUDA__
-#define __constant__ __attribute__((constant))
-#define __device__ __attribute__((device))
-#define __global__ __attribute__((global))
-#define __host__ __attribute__((host))
-#define __shared__ __attribute__((shared))
-#if __HIP__
-#define __managed__ __attribute__((managed))
-#endif
-#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
-#define __grid_constant__ __attribute__((grid_constant))
-#define __cluster_dims__(...) __attribute__((cluster_dims(__VA_ARGS__)))
-#define __no_cluster__ __attribute__((no_cluster))
-#else
-#define __constant__
-#define __device__
-#define __global__
-#define __host__
-#define __shared__
-#define __managed__
-#define __launch_bounds__(...)
-#define __grid_constant__
-#define __cluster_dims__(...)
-#define __no_cluster__
-#endif
-
-struct dim3 {
- unsigned x, y, z;
- __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
-};
-
-#if __HIP__ || HIP_PLATFORM
-typedef struct hipStream *hipStream_t;
-typedef enum hipError {} hipError_t;
-int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
- hipStream_t stream = 0);
-extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
- size_t sharedSize = 0,
- hipStream_t stream = 0);
-#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__
-extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
- dim3 blockDim, void **args,
- size_t sharedMem,
- hipStream_t stream);
-#else
-extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim,
- dim3 blockDim, void **args,
- size_t sharedMem,
- hipStream_t stream);
-#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__
-#elif __OFFLOAD_VIA_LLVM__
-extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim,
- size_t sharedMem = 0, void *stream = 0);
-extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
- void **args, size_t sharedMem = 0, void *stream = 0);
-#else
-typedef struct cudaStream *cudaStream_t;
-typedef enum cudaError {} cudaError_t;
-extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
- size_t sharedSize = 0,
- cudaStream_t stream = 0);
-extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
- size_t sharedSize = 0,
- cudaStream_t stream = 0);
-extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
- dim3 blockDim, void **args,
- size_t sharedMem, cudaStream_t stream);
-extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
- dim3 blockDim, void **args,
- size_t sharedMem, cudaStream_t stream);
-extern "C" __device__ cudaError_t cudaLaunchDevice(void *func,
- void *parameterBuffer,
- dim3 gridDim, dim3 blockDim,
- unsigned int sharedMem,
- cudaStream_t stream);
-extern "C" __device__ void *cudaGetParameterBuffer(size_t alignment,
- size_t size);
-#endif
-
-extern "C" __device__ int printf(const char*, ...);
-
-struct char1 {
- char x;
- __host__ __device__ char1(char x = 0) : x(x) {}
-};
-struct char2 {
- char x, y;
- __host__ __device__ char2(char x = 0, char y = 0) : x(x), y(y) {}
-};
-struct char4 {
- char x, y, z, w;
- __host__ __device__ char4(char x = 0, char y = 0, char z = 0, char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uchar1 {
- unsigned char x;
- __host__ __device__ uchar1(unsigned char x = 0) : x(x) {}
-};
-struct uchar2 {
- unsigned char x, y;
- __host__ __device__ uchar2(unsigned char x = 0, unsigned char y = 0) : x(x), y(y) {}
-};
-struct uchar4 {
- unsigned char x, y, z, w;
- __host__ __device__ uchar4(unsigned char x = 0, unsigned char y = 0, unsigned char z = 0, unsigned char w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct short1 {
- short x;
- __host__ __device__ short1(short x = 0) : x(x) {}
-};
-struct short2 {
- short x, y;
- __host__ __device__ short2(short x = 0, short y = 0) : x(x), y(y) {}
-};
-struct short4 {
- short x, y, z, w;
- __host__ __device__ short4(short x = 0, short y = 0, short z = 0, short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ushort1 {
- unsigned short x;
- __host__ __device__ ushort1(unsigned short x = 0) : x(x) {}
-};
-struct ushort2 {
- unsigned short x, y;
- __host__ __device__ ushort2(unsigned short x = 0, unsigned short y = 0) : x(x), y(y) {}
-};
-struct ushort4 {
- unsigned short x, y, z, w;
- __host__ __device__ ushort4(unsigned short x = 0, unsigned short y = 0, unsigned short z = 0, unsigned short w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct int1 {
- int x;
- __host__ __device__ int1(int x = 0) : x(x) {}
-};
-struct int2 {
- int x, y;
- __host__ __device__ int2(int x = 0, int y = 0) : x(x), y(y) {}
-};
-struct int4 {
- int x, y, z, w;
- __host__ __device__ int4(int x = 0, int y = 0, int z = 0, int w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct uint1 {
- unsigned x;
- __host__ __device__ uint1(unsigned x = 0) : x(x) {}
-};
-struct uint2 {
- unsigned x, y;
- __host__ __device__ uint2(unsigned x = 0, unsigned y = 0) : x(x), y(y) {}
-};
-struct uint3 {
- unsigned x, y, z;
- __host__ __device__ uint3(unsigned x = 0, unsigned y = 0, unsigned z = 0) : x(x), y(y), z(z) {}
-};
-struct uint4 {
- unsigned x, y, z, w;
- __host__ __device__ uint4(unsigned x = 0, unsigned y = 0, unsigned z = 0, unsigned w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct longlong1 {
- long long x;
- __host__ __device__ longlong1(long long x = 0) : x(x) {}
-};
-struct longlong2 {
- long long x, y;
- __host__ __device__ longlong2(long long x = 0, long long y = 0) : x(x), y(y) {}
-};
-struct longlong4 {
- long long x, y, z, w;
- __host__ __device__ longlong4(long long x = 0, long long y = 0, long long z = 0, long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct ulonglong1 {
- unsigned long long x;
- __host__ __device__ ulonglong1(unsigned long long x = 0) : x(x) {}
-};
-struct ulonglong2 {
- unsigned long long x, y;
- __host__ __device__ ulonglong2(unsigned long long x = 0, unsigned long long y = 0) : x(x), y(y) {}
-};
-struct ulonglong4 {
- unsigned long long x, y, z, w;
- __host__ __device__ ulonglong4(unsigned long long x = 0, unsigned long long y = 0, unsigned long long z = 0, unsigned long long w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct float1 {
- float x;
- __host__ __device__ float1(float x = 0) : x(x) {}
-};
-struct float2 {
- float x, y;
- __host__ __device__ float2(float x = 0, float y = 0) : x(x), y(y) {}
-};
-struct float4 {
- float x, y, z, w;
- __host__ __device__ float4(float x = 0, float y = 0, float z = 0, float w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-struct double1 {
- double x;
- __host__ __device__ double1(double x = 0) : x(x) {}
-};
-struct double2 {
- double x, y;
- __host__ __device__ double2(double x = 0, double y = 0) : x(x), y(y) {}
-};
-struct double4 {
- double x, y, z, w;
- __host__ __device__ double4(double x = 0, double y = 0, double z = 0, double w = 0) : x(x), y(y), z(z), w(w) {}
-};
-
-typedef unsigned long long cudaTextureObject_t;
-typedef unsigned long long cudaSurfaceObject_t;
-
-enum cudaTextureReadMode {
- cudaReadModeNormalizedFloat,
- cudaReadModeElementType
-};
-
-enum cudaSurfaceBoundaryMode {
- cudaBoundaryModeZero,
- cudaBoundaryModeClamp,
- cudaBoundaryModeTrap
-};
-
-enum {
- cudaTextureType1D,
- cudaTextureType2D,
- cudaTextureType3D,
- cudaTextureTypeCubemap,
- cudaTextureType1DLayered,
- cudaTextureType2DLayered,
- cudaTextureTypeCubemapLayered
-};
-
-struct textureReference { };
-template <class T, int texType = cudaTextureType1D,
- enum cudaTextureReadMode mode = cudaReadModeElementType>
-struct __attribute__((device_builtin_texture_type)) texture
- : public textureReference {};
-
-struct surfaceReference { int desc; };
-
-template <typename T, int dim = 1>
-struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference {};
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu b/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
deleted file mode 100644
index db0260b9bc5d8..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/basic-cuda.cu
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
-// RUN: --cuda-host-only | FileCheck %s
-// RUN: clang-tidy %s -checks='-*,modernize-use-nullptr' -- \
-// RUN: --cuda-path=%S/Inputs/CUDA \
-// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA \
-// RUN: --cuda-device-only | FileCheck %s
-
-#include <cuda_runtime.h>
-
-// CHECK: :[[@LINE+1]]:38: warning: use nullptr [modernize-use-nullptr]
-__global__ void kernel(int *p) { p = 0; }
-
-// CHECK: :[[@LINE+1]]:11: warning: use nullptr [modernize-use-nullptr]
-void *p = 0;
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/cuda-basic.cu b/clang-tools-extra/test/clang-tidy/infrastructure/cuda-basic.cu
new file mode 100644
index 0000000000000..fda58da96db83
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/cuda-basic.cu
@@ -0,0 +1,13 @@
+// RUN: %check_clang_tidy %s modernize-use-nullptr %t -- -- \
+// RUN: --cuda-path=%S/Inputs/CUDA \
+// RUN: -nocudalib -nocudainc -I %S/Inputs/CUDA
+
+#include <cuda.h>
+
+__global__ void kernel(int *p) { p = 0; }
+// CHECK-MESSAGES: :[[@LINE-1]]:38: warning: use nullptr [modernize-use-nullptr]
+// CHECK-FIXES: __global__ void kernel(int *p) { p = nullptr; }
+
+void *p = 0;
+// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: use nullptr [modernize-use-nullptr]
+// CHECK-FIXES: void *p = nullptr;
More information about the cfe-commits
mailing list