[clang] [CUDA] make kernel stub ICF-proof (PR #90155)
Reid Kleckner via cfe-commits
cfe-commits at lists.llvm.org
Mon Apr 29 16:12:11 PDT 2024
================
@@ -424,6 +424,34 @@ void CGNVCUDARuntime::emitDeviceStubBodyNew(CodeGenFunction &CGF,
CGM.CreateRuntimeFunction(FTy, LaunchKernelName);
CGF.EmitCall(FI, CGCallee::forDirect(cudaLaunchKernelFn), ReturnValueSlot(),
LaunchKernelArgs);
+
+ // To prevent CUDA device stub functions from being merged by ICF in MSVC
+ // environment, create an unique global variable for each kernel and write to
+ // the variable in the device stub.
+ if (CGM.getContext().getTargetInfo().getCXXABI().isMicrosoft() &&
+ !CGF.getLangOpts().HIP) {
+ llvm::Function *KernelFunction = llvm::cast<llvm::Function>(Kernel);
+ if (KernelFunction->hasComdat()) {
+ std::string KernelName = KernelFunction->getName().str();
+ std::string GlobalVarName = KernelName + ".id";
+
+ llvm::GlobalVariable *HandleVar =
+ CGM.getModule().getNamedGlobal(GlobalVarName);
+ if (!HandleVar) {
+ HandleVar = new llvm::GlobalVariable(
+ CGM.getModule(), CGM.Int8Ty,
+ /*Constant=*/false, KernelFunction->getLinkage(),
+ llvm::ConstantInt::get(CGM.Int8Ty, 0), GlobalVarName);
+ HandleVar->setDSOLocal(KernelFunction->isDSOLocal());
+ HandleVar->setVisibility(KernelFunction->getVisibility());
+ HandleVar->setComdat(CGM.getModule().getOrInsertComdat(GlobalVarName));
+ }
+
+ CGF.Builder.CreateAlignedStore(llvm::ConstantInt::get(CGM.Int8Ty, 1),
----------------
rnk wrote:
LLVM knows how to optimize away a single write to an otherwise unused global, so I would mark this store volatile.
https://github.com/llvm/llvm-project/pull/90155
More information about the cfe-commits
mailing list