[llvm] 421085f - [Offload] Change unregister library to use `atexit` instead of destructor (#86830)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 27 12:09:41 PDT 2024
Author: Joseph Huber
Date: 2024-03-27T14:09:37-05:00
New Revision: 421085fd740d937559db068c0a1b354b033aca63
URL: https://github.com/llvm/llvm-project/commit/421085fd740d937559db068c0a1b354b033aca63
DIFF: https://github.com/llvm/llvm-project/commit/421085fd740d937559db068c0a1b354b033aca63.diff
LOG: [Offload] Change unregister library to use `atexit` instead of destructor (#86830)
Summary:
The 'new driver' sets up the lifetime of a registered liftime using
global constructors and destructors. Currently, this is put at priority
1 which isn't strictly conformant as it will conflict with system
utilities. We now use 101 as this is the loweest suggested for
non-system constructors and will still run before user constructors.
Secondly, there were issues with the CUDA runtime when destructed with a
global destructor. Because the global ones are in any order and
potentially run before other things we were hitting an edge case where
the OpenMP runtime was uninitialized *after* `_dl_fini` was called. This
would result in us erroring when we call into a destroyed `libcuda.so`
instance. using `atexit` is what CUDA / HIP use and it prevents this
from happening. Most everything uses `atexit` except system utilities
and because of the constructor priority it will be unregistered *after*
everything else but not after `_fl_fini`.
Added:
Modified:
clang/test/Driver/linker-wrapper-image.c
llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
Removed:
################################################################################
diff --git a/clang/test/Driver/linker-wrapper-image.c b/clang/test/Driver/linker-wrapper-image.c
index 75475264135224..d01445e3aed04e 100644
--- a/clang/test/Driver/linker-wrapper-image.c
+++ b/clang/test/Driver/linker-wrapper-image.c
@@ -26,11 +26,11 @@
// OPENMP: @.omp_offloading.device_image = internal unnamed_addr constant [[[SIZE:[0-9]+]] x i8] c"\10\FF\10\AD{{.*}}", section ".llvm.offloading", align 8
// OPENMP-NEXT: @.omp_offloading.device_images = internal unnamed_addr constant [1 x %__tgt_device_image] [%__tgt_device_image { ptr getelementptr inbounds ([[[BEGIN:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr getelementptr inbounds ([[[END:[0-9]+]] x i8], ptr @.omp_offloading.device_image, i64 1, i64 0), ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }]
// OPENMP-NEXT: @.omp_offloading.descriptor = internal constant %__tgt_bin_desc { i32 1, ptr @.omp_offloading.device_images, ptr @__start_omp_offloading_entries, ptr @__stop_omp_offloading_entries }
-// OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_reg, ptr null }]
-// OPENMP-NEXT: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.omp_offloading.descriptor_unreg, ptr null }]
+// OPENMP-NEXT: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.omp_offloading.descriptor_reg, ptr null }]
// OPENMP: define internal void @.omp_offloading.descriptor_reg() section ".text.startup" {
// OPENMP-NEXT: entry:
+// OPENMP-NEXT: %0 = call i32 @atexit(ptr @.omp_offloading.descriptor_unreg)
// OPENMP-NEXT: call void @__tgt_register_lib(ptr @.omp_offloading.descriptor)
// OPENMP-NEXT: ret void
// OPENMP-NEXT: }
@@ -62,7 +62,7 @@
// CUDA-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1180844977, i32 1, ptr @.fatbin_image, ptr null }, section ".nvFatBinSegment", align 8
// CUDA-NEXT: @.cuda.binary_handle = internal global ptr null
-// CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.cuda.fatbin_reg, ptr null }]
+// CUDA: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.cuda.fatbin_reg, ptr null }]
// CUDA: define internal void @.cuda.fatbin_reg() section ".text.startup" {
// CUDA-NEXT: entry:
@@ -162,7 +162,7 @@
// HIP-NEXT: @.fatbin_wrapper = internal constant %fatbin_wrapper { i32 1212764230, i32 1, ptr @.fatbin_image, ptr null }, section ".hipFatBinSegment", align 8
// HIP-NEXT: @.hip.binary_handle = internal global ptr null
-// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @.hip.fatbin_reg, ptr null }]
+// HIP: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 101, ptr @.hip.fatbin_reg, ptr null }]
// HIP: define internal void @.hip.fatbin_reg() section ".text.startup" {
// HIP-NEXT: entry:
diff --git a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
index fec1bdbe9d8c74..7241d15ed1c670 100644
--- a/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
+++ b/llvm/lib/Frontend/Offloading/OffloadWrapper.cpp
@@ -186,57 +186,62 @@ GlobalVariable *createBinDesc(Module &M, ArrayRef<ArrayRef<char>> Bufs,
".omp_offloading.descriptor" + Suffix);
}
-void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
- StringRef Suffix) {
+Function *createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
+ StringRef Suffix) {
LLVMContext &C = M.getContext();
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
- auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
- ".omp_offloading.descriptor_reg" + Suffix, &M);
+ auto *Func =
+ Function::Create(FuncTy, GlobalValue::InternalLinkage,
+ ".omp_offloading.descriptor_unreg" + Suffix, &M);
Func->setSection(".text.startup");
- // Get __tgt_register_lib function declaration.
- auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
- /*isVarArg*/ false);
- FunctionCallee RegFuncC =
- M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
+ // Get __tgt_unregister_lib function declaration.
+ auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
+ /*isVarArg*/ false);
+ FunctionCallee UnRegFuncC =
+ M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
// Construct function body
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
- Builder.CreateCall(RegFuncC, BinDesc);
+ Builder.CreateCall(UnRegFuncC, BinDesc);
Builder.CreateRetVoid();
- // Add this function to constructors.
- // Set priority to 1 so that __tgt_register_lib is executed AFTER
- // __tgt_register_requires (we want to know what requirements have been
- // asked for before we load a libomptarget plugin so that by the time the
- // plugin is loaded it can report how many devices there are which can
- // satisfy these requirements).
- appendToGlobalCtors(M, Func, /*Priority*/ 1);
+ return Func;
}
-void createUnregisterFunction(Module &M, GlobalVariable *BinDesc,
- StringRef Suffix) {
+void createRegisterFunction(Module &M, GlobalVariable *BinDesc,
+ StringRef Suffix) {
LLVMContext &C = M.getContext();
auto *FuncTy = FunctionType::get(Type::getVoidTy(C), /*isVarArg*/ false);
- auto *Func =
- Function::Create(FuncTy, GlobalValue::InternalLinkage,
- ".omp_offloading.descriptor_unreg" + Suffix, &M);
+ auto *Func = Function::Create(FuncTy, GlobalValue::InternalLinkage,
+ ".omp_offloading.descriptor_reg" + Suffix, &M);
Func->setSection(".text.startup");
- // Get __tgt_unregister_lib function declaration.
- auto *UnRegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
- /*isVarArg*/ false);
- FunctionCallee UnRegFuncC =
- M.getOrInsertFunction("__tgt_unregister_lib", UnRegFuncTy);
+ // Get __tgt_register_lib function declaration.
+ auto *RegFuncTy = FunctionType::get(Type::getVoidTy(C), getBinDescPtrTy(M),
+ /*isVarArg*/ false);
+ FunctionCallee RegFuncC =
+ M.getOrInsertFunction("__tgt_register_lib", RegFuncTy);
+
+ auto *AtExitTy = FunctionType::get(
+ Type::getInt32Ty(C), PointerType::getUnqual(C), /*isVarArg=*/false);
+ FunctionCallee AtExit = M.getOrInsertFunction("atexit", AtExitTy);
+
+ Function *UnregFunc = createUnregisterFunction(M, BinDesc, Suffix);
// Construct function body
IRBuilder<> Builder(BasicBlock::Create(C, "entry", Func));
- Builder.CreateCall(UnRegFuncC, BinDesc);
+
+ // Register the destructors with 'atexit'. This is expected by the CUDA
+ // runtime and ensures that we clean up before dynamic objects are destroyed.
+ // This needs to be done before the runtime is called and registers its own.
+ Builder.CreateCall(AtExit, UnregFunc);
+
+ Builder.CreateCall(RegFuncC, BinDesc);
Builder.CreateRetVoid();
- // Add this function to global destructors.
- // Match priority of __tgt_register_lib
- appendToGlobalDtors(M, Func, /*Priority*/ 1);
+ // Add this function to constructors.
+ appendToGlobalCtors(M, Func, /*Priority=*/101);
}
// struct fatbin_wrapper {
@@ -578,7 +583,7 @@ void createRegisterFatbinFunction(Module &M, GlobalVariable *FatbinDesc,
DtorBuilder.CreateRetVoid();
// Add this function to constructors.
- appendToGlobalCtors(M, CtorFunc, /*Priority*/ 1);
+ appendToGlobalCtors(M, CtorFunc, /*Priority=*/101);
}
} // namespace
@@ -591,7 +596,6 @@ Error offloading::wrapOpenMPBinaries(Module &M, ArrayRef<ArrayRef<char>> Images,
return createStringError(inconvertibleErrorCode(),
"No binary descriptors created.");
createRegisterFunction(M, Desc, Suffix);
- createUnregisterFunction(M, Desc, Suffix);
return Error::success();
}
More information about the llvm-commits
mailing list