[clang] [HIP] Fix comdat of template kernel handle (PR #66283)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Sep 13 13:07:02 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
<details>
<summary>Changes</summary>
Currently, clang emits LLVM IR that fails verifier for the following code:
```
template<typename T>
__global__ void foo(T x);
void bar() {
foo<<<1, 1>>>(0);
}
```
This is due to clang putting the kernel handle for foo into comdat, which is not allowed, since the kernel handle is a declaration.
The siutation is similar to calling a declaration-only template function. The callee will be a declaration in LLVM IR and won't be put into comdat. This is in contrast to calling a template function with body, which will be put into comdat.
Fixes: SWDEV-419769
--
Full diff: https://github.com/llvm/llvm-project/pull/66283.diff
2 Files Affected:
- (modified) clang/lib/CodeGen/CGCUDANV.cpp (+4-1)
- (modified) clang/test/CodeGenCUDA/kernel-stub-name.cu (+12-1)
<pre>
diff --git a/clang/lib/CodeGen/CGCUDANV.cpp b/clang/lib/CodeGen/CGCUDANV.cpp
index 08769c98dc298a0..0efe7e8db0183fe 100644
--- a/clang/lib/CodeGen/CGCUDANV.cpp
+++ b/clang/lib/CodeGen/CGCUDANV.cpp
@@ -1234,7 +1234,10 @@ llvm::GlobalValue *CGNVCUDARuntime::getKernelHandle(llvm::Function *F,
Var->setAlignment(CGM.getPointerAlign().getAsAlign());
Var->setDSOLocal(F->isDSOLocal());
Var->setVisibility(F->getVisibility());
- CGM.maybeSetTrivialComdat(*GD.getDecl(), *Var);
+ auto *FD = cast<FunctionDecl>(GD.getDecl());
+ auto *FT = FD->getPrimaryTemplate();
+ if (!FT || FT->isThisDeclarationADefinition())
+ CGM.maybeSetTrivialComdat(*FD, *Var);
KernelHandles[F->getName()] = Var;
KernelStubs[Var] = F;
return Var;
diff --git a/clang/test/CodeGenCUDA/kernel-stub-name.cu b/clang/test/CodeGenCUDA/kernel-stub-name.cu
index 9884046fcd0fd0c..008d66bd590b759 100644
--- a/clang/test/CodeGenCUDA/kernel-stub-name.cu
+++ b/clang/test/CodeGenCUDA/kernel-stub-name.cu
@@ -26,12 +26,13 @@
// GNU: @[[HNSKERN:_ZN2ns8nskernelEv]] = constant ptr @[[NSSTUB:_ZN2ns23__device_stub__nskernelEv]], align 8
// GNU: @[[HTKERN:_Z10kernelfuncIiEvv]] = linkonce_odr constant ptr @[[TSTUB:_Z25__device_stub__kernelfuncIiEvv]], comdat, align 8
// GNU: @[[HDKERN:_Z11kernel_declv]] = external constant ptr, align 8
+// GNU: @[[HTDKERN:_Z16temp_kernel_declIiEvT_]] = external constant ptr, align 8
// MSVC: @[[HCKERN:ckernel]] = dso_local constant ptr @[[CSTUB:__device_stub__ckernel]], align 8
// MSVC: @[[HNSKERN:"\?nskernel at ns@@YAXXZ.*"]] = dso_local constant ptr @[[NSSTUB:"\?__device_stub__nskernel at ns@@YAXXZ"]], align 8
// MSVC: @[[HTKERN:"\?\?\$kernelfunc at H@@YAXXZ.*"]] = linkonce_odr dso_local constant ptr @[[TSTUB:"\?\?\$__device_stub__kernelfunc at H@@YAXXZ.*"]], comdat, align 8
// MSVC: @[[HDKERN:"\?kernel_decl@@YAXXZ.*"]] = external dso_local constant ptr, align 8
-
+// MSVC: @[[HTDKERN:"\?\?\$temp_kernel_decl at H@@YAXH.*"]] = external dso_local constant ptr, align 8
extern "C" __global__ void ckernel() {}
namespace ns {
@@ -43,6 +44,9 @@ __global__ void kernelfunc() {}
__global__ void kernel_decl();
+template<class T>
+__global__ void temp_kernel_decl(T x);
+
extern "C" void (*kernel_ptr)();
extern "C" void *void_ptr;
@@ -69,13 +73,16 @@ extern "C" void launch(void *kern);
// CHECK: call void @[[NSSTUB]]()
// CHECK: call void @[[TSTUB]]()
// GNU: call void @[[DSTUB:_Z26__device_stub__kernel_declv]]()
+// GNU: call void @[[TDSTUB:_Z31__device_stub__temp_kernel_declIiEvT_]](
// MSVC: call void @[[DSTUB:"\?__device_stub__kernel_decl@@YAXXZ"]]()
+// MSVC: call void @[[TDSTUB:"\?\?\$__device_stub__temp_kernel_decl at H@@YAXH at Z"]](
extern "C" void fun1(void) {
ckernel<<<1, 1>>>();
ns::nskernel<<<1, 1>>>();
kernelfunc<int><<<1, 1>>>();
kernel_decl<<<1, 1>>>();
+ temp_kernel_decl<<<1, 1>>>(1);
}
// Template kernel stub functions
@@ -86,6 +93,7 @@ extern "C" void fun1(void) {
// Check declaration of stub function for external kernel.
// CHECK: declare{{.*}}@[[DSTUB]]
+// CHECK: declare{{.*}}@[[TDSTUB]]
// Check kernel handle is used for passing the kernel as a function pointer.
@@ -94,11 +102,13 @@ extern "C" void fun1(void) {
// CHECK: call void @launch({{.*}}[[HNSKERN]]
// CHECK: call void @launch({{.*}}[[HTKERN]]
// CHECK: call void @launch({{.*}}[[HDKERN]]
+// CHECK: call void @launch({{.*}}[[HTDKERN]]
extern "C" void fun2() {
launch((void *)ckernel);
launch((void *)ns::nskernel);
launch((void *)kernelfunc<int>);
launch((void *)kernel_decl);
+ launch((void *)temp_kernel_decl<int>);
}
// Check kernel handle is used for assigning a kernel to a function pointer.
@@ -148,3 +158,4 @@ extern "C" void fun5() {
// CHECK: call{{.*}}@__hipRegisterFunction{{.*}}@[[HTKERN]]{{.*}}@[[TKERN]]
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}__device_stub
// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}kernel_decl
+// NEG-NOT: call{{.*}}@__hipRegisterFunction{{.*}}temp_kernel_decl
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66283
More information about the cfe-commits
mailing list