[Openmp-commits] [openmp] 8cf93a3 - [libomptarget][amdgpu] Destruct HSA queues
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Sun Sep 26 07:34:42 PDT 2021
Author: Jon Chesterfield
Date: 2021-09-26T15:34:21+01:00
New Revision: 8cf93a35d4b873b5e50c152d00adfc3701c679ea
URL: https://github.com/llvm/llvm-project/commit/8cf93a35d4b873b5e50c152d00adfc3701c679ea
DIFF: https://github.com/llvm/llvm-project/commit/8cf93a35d4b873b5e50c152d00adfc3701c679ea.diff
LOG: [libomptarget][amdgpu] Destruct HSA queues
Store queues in unique_ptr so they are destroyed when the global DeviceInfo is. Currently they leak which raises an assert in debug builds of hsa.
Reviewed By: pdhaliwal
Differential Revision: https://reviews.llvm.org/D109511
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp
openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp
index 3aa749c40463a..466e3ee19feb7 100644
--- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.cpp
@@ -29,6 +29,7 @@ DLWRAP(hsa_signal_store_relaxed, 2);
DLWRAP(hsa_signal_store_screlease, 2);
DLWRAP(hsa_signal_wait_scacquire, 5);
DLWRAP(hsa_queue_create, 8);
+DLWRAP(hsa_queue_destroy, 1);
DLWRAP(hsa_queue_load_read_index_scacquire, 1);
DLWRAP(hsa_queue_add_write_index_relaxed, 2);
DLWRAP(hsa_memory_copy, 3);
diff --git a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h
index daad4a0dd31b2..54359c2e3dab9 100644
--- a/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h
+++ b/openmp/libomptarget/plugins/amdgpu/dynamic_hsa/hsa.h
@@ -142,6 +142,8 @@ hsa_status_t hsa_queue_create(hsa_agent_t agent, uint32_t size,
void *data, uint32_t private_segment_size,
uint32_t group_segment_size, hsa_queue_t **queue);
+hsa_status_t hsa_queue_destroy(hsa_queue_t *queue);
+
uint64_t hsa_queue_load_read_index_scacquire(const hsa_queue_t *queue);
uint64_t hsa_queue_add_write_index_relaxed(const hsa_queue_t *queue,
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index f72719ad676d0..e08b4b28f2438 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -451,6 +451,17 @@ class RTLDeviceInfoTy {
HSALifetime HSA; // First field => constructed first and destructed last
std::vector<std::list<FuncOrGblEntryTy>> FuncGblEntries;
+ struct QueueDeleter {
+ void operator()(hsa_queue_t *Q) {
+ if (Q) {
+ hsa_status_t Err = hsa_queue_destroy(Q);
+ if (Err != HSA_STATUS_SUCCESS) {
+ DP("Error destroying hsa queue: %s\n", get_error_string(Err));
+ }
+ }
+ }
+ };
+
public:
// load binary populates symbol tables and mutates various global state
// run uses those symbol tables
@@ -460,7 +471,8 @@ class RTLDeviceInfoTy {
// GPU devices
std::vector<hsa_agent_t> HSAAgents;
- std::vector<hsa_queue_t *> HSAQueues; // one per gpu
+ std::vector<std::unique_ptr<hsa_queue_t, QueueDeleter>>
+ HSAQueues; // one per gpu
// CPUs
std::vector<hsa_agent_t> CPUAgents;
@@ -773,10 +785,6 @@ class RTLDeviceInfoTy {
return;
}
- for (int i = 0; i < NumberOfDevices; i++) {
- HSAQueues[i] = nullptr;
- }
-
for (int i = 0; i < NumberOfDevices; i++) {
uint32_t queue_size = 0;
{
@@ -792,12 +800,16 @@ class RTLDeviceInfoTy {
}
}
- hsa_status_t rc = hsa_queue_create(
- HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI, callbackQueue, NULL,
- UINT32_MAX, UINT32_MAX, &HSAQueues[i]);
- if (rc != HSA_STATUS_SUCCESS) {
- DP("Failed to create HSA queue %d\n", i);
- return;
+ {
+ hsa_queue_t *Q = nullptr;
+ hsa_status_t rc =
+ hsa_queue_create(HSAAgents[i], queue_size, HSA_QUEUE_TYPE_MULTI,
+ callbackQueue, NULL, UINT32_MAX, UINT32_MAX, &Q);
+ if (rc != HSA_STATUS_SUCCESS) {
+ DP("Failed to create HSA queue %d\n", i);
+ return;
+ }
+ HSAQueues[i].reset(Q);
}
deviceStateStore[i] = {nullptr, 0};
@@ -2149,7 +2161,7 @@ int32_t __tgt_rtl_run_target_team_region_locked(
// Run on the device.
{
- hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id];
+ hsa_queue_t *queue = DeviceInfo.HSAQueues[device_id].get();
if (!queue) {
return OFFLOAD_FAIL;
}
More information about the Openmp-commits
mailing list