[Openmp-commits] [openmp] 292cb11 - [Libomptarget] Revert changes to AMDGPU plugin destructors
Joseph Huber via Openmp-commits
openmp-commits at lists.llvm.org
Fri Sep 16 04:56:03 PDT 2022
Author: Joseph Huber
Date: 2022-09-16T06:55:51-05:00
New Revision: 292cb114b0a35da5e35eb856c29deff577c54210
URL: https://github.com/llvm/llvm-project/commit/292cb114b0a35da5e35eb856c29deff577c54210
DIFF: https://github.com/llvm/llvm-project/commit/292cb114b0a35da5e35eb856c29deff577c54210.diff
LOG: [Libomptarget] Revert changes to AMDGPU plugin destructors
These patches exposed a lot of problems in the AMD toolchain. Rather
than keep it broken we should revert it to its old semi-functional
state. This will prevent us from using device destructors but should
remove some new bugs. In the future this interface should be changed
once these problems are addressed more correctly.
This reverts commit ed0f21811544320f829124efbb6a38ee12eb9155.
This reverts commit 2b7203a35972e98b8521f92d2791043dc539ae88.
Fixes #57536
Reviewed By: jdoerfert
Differential Revision: https://reviews.llvm.org/D133997
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
openmp/libomptarget/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 3fcb8a74dc3b6..e00b445695a95 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -214,6 +214,9 @@ struct KernelArgPool {
};
pthread_mutex_t KernelArgPool::Mutex = PTHREAD_MUTEX_INITIALIZER;
+std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
+ KernelArgPoolMap;
+
/// Use a single entity to encode a kernel and a set of flags
struct KernelTy {
llvm::omp::OMPTgtExecModeFlags ExecutionMode;
@@ -225,9 +228,7 @@ struct KernelTy {
KernelTy(llvm::omp::OMPTgtExecModeFlags ExecutionMode, int16_t ConstWgSize,
int32_t DeviceId, void *CallStackAddr, const char *Name,
uint32_t KernargSegmentSize,
- hsa_amd_memory_pool_t &KernArgMemoryPool,
- std::unordered_map<std::string, std::unique_ptr<KernelArgPool>>
- &KernelArgPoolMap)
+ hsa_amd_memory_pool_t &KernArgMemoryPool)
: ExecutionMode(ExecutionMode), ConstWGSize(ConstWgSize),
DeviceId(DeviceId), CallStackAddr(CallStackAddr), Name(Name) {
DP("Construct kernelinfo: ExecMode %d\n", ExecutionMode);
@@ -241,6 +242,10 @@ struct KernelTy {
}
};
+/// List that contains all the kernels.
+/// FIXME: we may need this to be per device and per library.
+std::list<KernelTy> KernelsList;
+
template <typename Callback> static hsa_status_t findAgents(Callback CB) {
hsa_status_t Err =
@@ -455,12 +460,6 @@ class RTLDeviceInfoTy : HSALifetime {
int NumberOfDevices = 0;
- /// List that contains all the kernels.
- /// FIXME: we may need this to be per device and per library.
- std::list<KernelTy> KernelsList;
- std::unordered_map<std::string /*kernel*/, std::unique_ptr<KernelArgPool>>
- KernelArgPoolMap;
-
// GPU devices
std::vector<hsa_agent_t> HSAAgents;
std::vector<HSAQueueScheduler> HSAQueueSchedulers; // one per gpu
@@ -862,6 +861,7 @@ class RTLDeviceInfoTy : HSALifetime {
"Unexpected device id!");
FuncGblEntries[DeviceId].emplace_back();
FuncOrGblEntryTy &E = FuncGblEntries[DeviceId].back();
+ // KernelArgPoolMap.clear();
E.Entries.clear();
E.Table.EntriesBegin = E.Table.EntriesEnd = 0;
}
@@ -1117,8 +1117,10 @@ class RTLDeviceInfoTy : HSALifetime {
pthread_mutex_t SignalPoolT::mutex = PTHREAD_MUTEX_INITIALIZER;
-static RTLDeviceInfoTy *DeviceInfoState = nullptr;
-static RTLDeviceInfoTy &DeviceInfo() { return *DeviceInfoState; }
+// Putting accesses to DeviceInfo global behind a function call prior
+// to changing to use init_plugin/deinit_plugin calls
+static RTLDeviceInfoTy DeviceInfoState;
+static RTLDeviceInfoTy &DeviceInfo() { return DeviceInfoState; }
namespace {
@@ -1459,9 +1461,8 @@ int32_t runRegionLocked(int32_t DeviceId, void *TgtEntryPtr, void **TgtArgs,
KernelArgPool *ArgPool = nullptr;
void *KernArg = nullptr;
{
- auto It =
- DeviceInfo().KernelArgPoolMap.find(std::string(KernelInfo->Name));
- if (It != DeviceInfo().KernelArgPoolMap.end()) {
+ auto It = KernelArgPoolMap.find(std::string(KernelInfo->Name));
+ if (It != KernelArgPoolMap.end()) {
ArgPool = (It->second).get();
}
}
@@ -1940,20 +1941,6 @@ bool IsImageCompatibleWithEnv(const char *ImgInfo, std::string EnvInfo) {
}
extern "C" {
-
-int32_t __tgt_rtl_init_plugin() {
- DeviceInfoState = new RTLDeviceInfoTy;
- return (DeviceInfoState && DeviceInfoState->ConstructionSucceeded)
- ? OFFLOAD_SUCCESS
- : OFFLOAD_FAIL;
-}
-
-int32_t __tgt_rtl_deinit_plugin() {
- if (DeviceInfoState)
- delete DeviceInfoState;
- return OFFLOAD_SUCCESS;
-}
-
int32_t __tgt_rtl_is_valid_binary(__tgt_device_image *Image) {
return elfMachineIdIsAmdgcn(Image);
}
@@ -1985,6 +1972,9 @@ int32_t __tgt_rtl_is_valid_binary_info(__tgt_device_image *image,
return true;
}
+int32_t __tgt_rtl_init_plugin() { return OFFLOAD_SUCCESS; }
+int32_t __tgt_rtl_deinit_plugin() { return OFFLOAD_SUCCESS; }
+
int __tgt_rtl_number_of_devices() {
// If the construction failed, no methods are safe to call
if (DeviceInfo().ConstructionSucceeded) {
@@ -2524,12 +2514,11 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t DeviceId,
}
check("Loading computation property", Err);
- DeviceInfo().KernelsList.push_back(
- KernelTy(ExecModeVal, WGSizeVal, DeviceId, CallStackAddr, E->name,
- KernargSegmentSize, DeviceInfo().KernArgPool,
- DeviceInfo().KernelArgPoolMap));
+ KernelsList.push_back(KernelTy(ExecModeVal, WGSizeVal, DeviceId,
+ CallStackAddr, E->name, KernargSegmentSize,
+ DeviceInfo().KernArgPool));
__tgt_offload_entry Entry = *E;
- Entry.addr = (void *)&DeviceInfo().KernelsList.back();
+ Entry.addr = (void *)&KernelsList.back();
DeviceInfo().addOffloadEntry(DeviceId, Entry);
DP("Entry point %ld maps to %s\n", E - HostBegin, E->name);
}
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 7fdc47d2560b9..70b8ad799db41 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -65,23 +65,6 @@ __attribute__((constructor(101))) void init() {
__attribute__((destructor(101))) void deinit() {
DP("Deinit target library!\n");
-
- for (auto *R : PM->RTLs.UsedRTLs) {
- // Plugins can either destroy their local state using global variables
- // or attribute(destructor) functions or by implementing deinit_plugin
- // The hazard with plugin local destructors is they may be called before
- // or after this destructor. If the plugin is destroyed using global
- // state before this library finishes calling into it the plugin is
- // likely to crash. If good fortune means the plugin outlives this
- // library then there may be no crash.
- // Using deinit_plugin and no global destructors from the plugin works.
- if (R->deinit_plugin) {
- if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
- DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
- }
- }
- }
-
delete PM;
if (ProfileTraceFile) {
@@ -579,6 +562,13 @@ void RTLsTy::unregisterLib(__tgt_bin_desc *Desc) {
PM->TblMapMtx.unlock();
// TODO: Write some RTL->unload_image(...) function?
+ for (auto *R : UsedRTLs) {
+ if (R->deinit_plugin) {
+ if (R->deinit_plugin() != OFFLOAD_SUCCESS) {
+ DP("Failure deinitializing RTL %s!\n", R->RTLName.c_str());
+ }
+ }
+ }
DP("Done unregistering library!\n");
}
More information about the Openmp-commits
mailing list