[clang] [CIR][CUDA] Upstream device stub mangling (PR #177790)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Jan 24 11:24:26 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: David Rivera (RiverDave)
<details>
<summary>Changes</summary>
Note that this is the start of a series of incremental patches; Therefore the stub body is empty for now as it requires a portion of the actual CUDA runtime implementation to be deferred for a later PR.
---
Full diff: https://github.com/llvm/llvm-project/pull/177790.diff
4 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+3-1)
- (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+8-2)
- (added) clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu (+22)
- (added) clang/test/CIR/CodeGen/inputs/cuda.h (+74)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index f2d73720a9c2b..4c212b06019ea 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -748,7 +748,9 @@ cir::FuncOp CIRGenFunction::generateCode(clang::GlobalDecl gd, cir::FuncOp fn,
emitConstructorBody(args);
} else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice &&
funcDecl->hasAttr<CUDAGlobalAttr>()) {
- getCIRGenModule().errorNYI(bodyRange, "CUDA kernel");
+ // TODO(cir): Emit device stub body with kernel launch runtime calls
+ // (emitDeviceStub). For now, emit an empty stub.
+ assert(!cir::MissingFeatures::cudaSupport());
} else if (isa<CXXMethodDecl>(funcDecl) &&
cast<CXXMethodDecl>(funcDecl)->isLambdaStaticInvoker()) {
// The lambda static invoker function is special, because it forwards or
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index 61d84f197e6ec..b535eab913a5d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -1772,9 +1772,15 @@ static std::string getMangledNameImpl(CIRGenModule &cgm, GlobalDecl gd,
cgm.errorNYI(nd->getSourceRange(), "getMangledName: X86RegCall");
} else if (fd && fd->hasAttr<CUDAGlobalAttr>() &&
gd.getKernelReferenceKind() == KernelReferenceKind::Stub) {
- cgm.errorNYI(nd->getSourceRange(), "getMangledName: CUDA device stub");
+ out << "__device_stub__" << ii->getName();
+ } else if (fd &&
+ DeviceKernelAttr::isOpenCLSpelling(
+ fd->getAttr<DeviceKernelAttr>()) &&
+ gd.getKernelReferenceKind() == KernelReferenceKind::Stub) {
+ cgm.errorNYI(nd->getSourceRange(), "getMangledName: OpenCL Stub");
+ } else {
+ out << ii->getName();
}
- out << ii->getName();
}
// Check if the module name hash should be appended for internal linkage
diff --git a/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu b/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu
new file mode 100644
index 0000000000000..6d5efb69827e3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/CUDA/kernel-stub-name.cu
@@ -0,0 +1,22 @@
+// Based on clang/test/CodeGenCUDA/kernel-stub-name.cu.
+
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-cir %s \
+// RUN: -x cuda -o %t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+#include "../inputs/cuda.h"
+
+// CHECK: cir.func {{.*}} @__device_stub__ckernel()
+// CHECK-NEXT: cir.return
+// CHECK-NEXT: }
+extern "C" __global__ void ckernel() {}
+
+// CHECK: cir.func {{.*}} @_ZN2ns23__device_stub__nskernelEv()
+namespace ns {
+__global__ void nskernel() {}
+} // namespace ns
+
+// CHECK: cir.func {{.*}} @_Z25__device_stub__kernelfuncIiEvv()
+template <class T>
+__global__ void kernelfunc() {}
+template __global__ void kernelfunc<int>();
diff --git a/clang/test/CIR/CodeGen/inputs/cuda.h b/clang/test/CIR/CodeGen/inputs/cuda.h
new file mode 100644
index 0000000000000..204bf2972088d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/inputs/cuda.h
@@ -0,0 +1,74 @@
+/* Minimal declarations for CUDA support. Testing purposes only. */
+/* From test/CodeGenCUDA/Inputs/cuda.h. */
+#include <stddef.h>
+
+#if __HIP__ || __CUDA__
+#define __constant__ __attribute__((constant))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __host__ __attribute__((host))
+#define __shared__ __attribute__((shared))
+#if __HIP__
+#define __managed__ __attribute__((managed))
+#endif
+#define __launch_bounds__(...) __attribute__((launch_bounds(__VA_ARGS__)))
+#define __grid_constant__ __attribute__((grid_constant))
+#else
+#define __constant__
+#define __device__
+#define __global__
+#define __host__
+#define __shared__
+#define __managed__
+#define __launch_bounds__(...)
+#define __grid_constant__
+#endif
+
+struct dim3 {
+ unsigned x, y, z;
+ __host__ __device__ dim3(unsigned x, unsigned y = 1, unsigned z = 1) : x(x), y(y), z(z) {}
+};
+
+#if __HIP__ || HIP_PLATFORM
+typedef struct hipStream *hipStream_t;
+typedef enum hipError {} hipError_t;
+int hipConfigureCall(dim3 gridSize, dim3 blockSize, size_t sharedSize = 0,
+ hipStream_t stream = 0);
+extern "C" hipError_t __hipPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ hipStream_t stream = 0);
+#ifndef __HIP_API_PER_THREAD_DEFAULT_STREAM__
+extern "C" hipError_t hipLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#else
+extern "C" hipError_t hipLaunchKernel_spt(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem,
+ hipStream_t stream);
+#endif // __HIP_API_PER_THREAD_DEFAULT_STREAM__
+#elif __OFFLOAD_VIA_LLVM__
+extern "C" unsigned __llvmPushCallConfiguration(dim3 gridDim, dim3 blockDim,
+ size_t sharedMem = 0, void *stream = 0);
+extern "C" unsigned llvmLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
+ void **args, size_t sharedMem = 0, void *stream = 0);
+#else
+typedef struct cudaStream *cudaStream_t;
+typedef enum cudaError {} cudaError_t;
+extern "C" int cudaConfigureCall(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" int __cudaPushCallConfiguration(dim3 gridSize, dim3 blockSize,
+ size_t sharedSize = 0,
+ cudaStream_t stream = 0);
+extern "C" cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+extern "C" cudaError_t cudaLaunchKernel_ptsz(const void *func, dim3 gridDim,
+ dim3 blockDim, void **args,
+ size_t sharedMem, cudaStream_t stream);
+
+#endif
+
+extern "C" __device__ int printf(const char*, ...);
``````````
</details>
https://github.com/llvm/llvm-project/pull/177790
More information about the cfe-commits
mailing list