[clang] b46f980 - [HIP] fix host-used external kernel (#83870)

via cfe-commits cfe-commits at lists.llvm.org
Fri Mar 8 07:50:42 PST 2024


Author: Yaxun (Sam) Liu
Date: 2024-03-08T10:50:38-05:00
New Revision: b46f980454d5ceafc8dab37dbdb1011e333ae6de

URL: https://github.com/llvm/llvm-project/commit/b46f980454d5ceafc8dab37dbdb1011e333ae6de
DIFF: https://github.com/llvm/llvm-project/commit/b46f980454d5ceafc8dab37dbdb1011e333ae6de.diff

LOG: [HIP] fix host-used external kernel (#83870)

In -fgpu-rdc mode, when an external kernel is used by a host function
with weak_odr linkage (e.g. explicitly instantiated template function),
the kernel should not be marked as host-used external kernel, since the
host function may be dropped by the linker. Mark the external kernel as
host-used external kernel will force a reference to the external kernel,
which the user may not define in other TU.

Fixes: https://github.com/llvm/llvm-project/issues/83771

Added: 
    

Modified: 
    clang/lib/Sema/SemaCUDA.cpp
    clang/lib/Sema/SemaExpr.cpp
    clang/test/CodeGenCUDA/host-used-extern.cu

Removed: 
    


################################################################################
diff  --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 6a66ecf6f94c17..4d4f4b6a2d4d95 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -895,7 +895,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
   if (DiagKind == SemaDiagnosticBuilder::K_Nop) {
     // For -fgpu-rdc, keep track of external kernels used by host functions.
     if (LangOpts.CUDAIsDevice && LangOpts.GPURelocatableDeviceCode &&
-        Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined())
+        Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() &&
+        (!Caller || (!Caller->getDescribedFunctionTemplate() &&
+                     getASTContext().GetGVALinkageForFunction(Caller) ==
+                         GVA_StrongExternal)))
       getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee);
     return true;
   }

diff  --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 47bb263f56aade..93f82e68ab6440 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19218,7 +19218,10 @@ MarkVarDeclODRUsed(ValueDecl *V, SourceLocation Loc, Sema &SemaRef,
       // externalize the static device side variable ODR-used by host code.
       if (!Var->hasExternalStorage())
         SemaRef.getASTContext().CUDADeviceVarODRUsedByHost.insert(Var);
-      else if (SemaRef.LangOpts.GPURelocatableDeviceCode)
+      else if (SemaRef.LangOpts.GPURelocatableDeviceCode &&
+               (!FD || (!FD->getDescribedFunctionTemplate() &&
+                        SemaRef.getASTContext().GetGVALinkageForFunction(FD) ==
+                            GVA_StrongExternal)))
         SemaRef.getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Var);
     }
   }

diff  --git a/clang/test/CodeGenCUDA/host-used-extern.cu b/clang/test/CodeGenCUDA/host-used-extern.cu
index e8f8e12aad47d1..1ae644ae981aaf 100644
--- a/clang/test/CodeGenCUDA/host-used-extern.cu
+++ b/clang/test/CodeGenCUDA/host-used-extern.cu
@@ -24,6 +24,7 @@
 
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel2v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel3v
+// NEG-NOT: @__clang_gpu_used_external = {{.*}} @_Z7kernel5v
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var2
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @var3
 // NEG-NOT: @__clang_gpu_used_external = {{.*}} @ext_shvar
@@ -44,6 +45,10 @@ __global__ void kernel3();
 // kernel4 is marked as used even though it is not called.
 __global__ void kernel4();
 
+// kernel5 is not marked as used since it is called by host function
+// with weak_odr linkage, which may be dropped by linker.
+__global__ void kernel5();
+
 extern __device__ int var1;
 
 __device__ int var2;
@@ -67,3 +72,11 @@ __global__ void test_lambda_using_extern_shared() {
   };
   lambda();
 }
+
+template<class T>
+void template_caller() {
+  kernel5<<<1, 1>>>();
+  var1 = 1;
+}
+
+template void template_caller<int>();


        


More information about the cfe-commits mailing list