[Openmp-commits] [openmp] 05ba9ff - [libomptarget][amdgpu] Refactor memory pool collection
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Fri Oct 1 06:58:08 PDT 2021
Author: Jon Chesterfield
Date: 2021-10-01T14:58:01+01:00
New Revision: 05ba9ff6a6d243a07bd8adbe70c066c9f6ddc591
URL: https://github.com/llvm/llvm-project/commit/05ba9ff6a6d243a07bd8adbe70c066c9f6ddc591
DIFF: https://github.com/llvm/llvm-project/commit/05ba9ff6a6d243a07bd8adbe70c066c9f6ddc591.diff
LOG: [libomptarget][amdgpu] Refactor memory pool collection
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 1d7980d694fa5..c4e49c7a8020d 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -314,7 +314,7 @@ hsa_status_t isValidMemoryPool(hsa_amd_memory_pool_t MemoryPool) {
return (AllocAllowed && Size > 0) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}
-hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) {
+hsa_status_t addMemoryPool(hsa_amd_memory_pool_t MemoryPool, void *Data) {
std::vector<hsa_amd_memory_pool_t> *Result =
static_cast<std::vector<hsa_amd_memory_pool_t> *>(Data);
@@ -323,66 +323,10 @@ hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) {
return err;
}
- uint32_t GlobalFlags = 0;
- err = hsa_amd_memory_pool_get_info(
- MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags);
- if (err != HSA_STATUS_SUCCESS) {
- DP("Get memory pool info failed: %s\n", get_error_string(err));
- return err;
- }
-
- if ((GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) &&
- (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT)) {
- Result->push_back(MemoryPool);
- }
-
- return HSA_STATUS_SUCCESS;
-}
-
-template <typename AccumulatorFunc>
-hsa_status_t collectMemoryPools(const std::vector<hsa_agent_t> &Agents,
- AccumulatorFunc Func) {
- for (int DeviceId = 0; DeviceId < Agents.size(); DeviceId++) {
- hsa_status_t Err = hsa::amd_agent_iterate_memory_pools(
- Agents[DeviceId], [&](hsa_amd_memory_pool_t MemoryPool) {
- hsa_status_t Err;
- if ((Err = isValidMemoryPool(MemoryPool)) != HSA_STATUS_SUCCESS) {
- DP("Skipping memory pool: %s\n", get_error_string(Err));
- } else
- Func(MemoryPool, DeviceId);
- return HSA_STATUS_SUCCESS;
- });
-
- if (Err != HSA_STATUS_SUCCESS) {
- DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Iterate all memory pools", get_error_string(Err));
- return Err;
- }
- }
-
+ Result->push_back(MemoryPool);
return HSA_STATUS_SUCCESS;
}
-std::pair<hsa_status_t, hsa_amd_memory_pool_t>
-FindKernargPool(const std::vector<hsa_agent_t> &HSAAgents) {
- std::vector<hsa_amd_memory_pool_t> KernArgPools;
- for (const auto &Agent : HSAAgents) {
- hsa_status_t err = HSA_STATUS_SUCCESS;
- err = hsa_amd_agent_iterate_memory_pools(
- Agent, addKernArgPool, static_cast<void *>(&KernArgPools));
- if (err != HSA_STATUS_SUCCESS) {
- DP("addKernArgPool returned %s, continuing\n", get_error_string(err));
- }
- }
-
- if (KernArgPools.empty()) {
- DP("Unable to find any valid kernarg pool\n");
- return {HSA_STATUS_ERROR, hsa_amd_memory_pool_t{}};
- }
-
- return {HSA_STATUS_SUCCESS, KernArgPools[0]};
-}
-
} // namespace
} // namespace core
@@ -620,49 +564,71 @@ class RTLDeviceInfoTy {
return HSA_STATUS_SUCCESS;
}
- hsa_status_t addHostMemoryPool(hsa_amd_memory_pool_t MemoryPool,
- int DeviceId) {
- uint32_t GlobalFlags = 0;
- hsa_status_t Err = hsa_amd_memory_pool_get_info(
- MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags);
+ hsa_status_t setupDevicePools(const std::vector<hsa_agent_t> &Agents) {
+ for (int DeviceId = 0; DeviceId < Agents.size(); DeviceId++) {
+ hsa_status_t Err = hsa::amd_agent_iterate_memory_pools(
+ Agents[DeviceId], [&](hsa_amd_memory_pool_t MemoryPool) {
+ hsa_status_t ValidStatus = core::isValidMemoryPool(MemoryPool);
+ if (ValidStatus != HSA_STATUS_SUCCESS) {
+ DP("Alloc allowed in memory pool check failed: %s\n",
+ get_error_string(ValidStatus));
+ return HSA_STATUS_SUCCESS;
+ }
+ return addDeviceMemoryPool(MemoryPool, DeviceId);
+ });
- if (Err != HSA_STATUS_SUCCESS) {
- return Err;
+ if (Err != HSA_STATUS_SUCCESS) {
+ DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+ "Iterate all memory pools", get_error_string(Err));
+ return Err;
+ }
}
+ return HSA_STATUS_SUCCESS;
+ }
- uint32_t Size;
- Err = hsa_amd_memory_pool_get_info(MemoryPool,
- HSA_AMD_MEMORY_POOL_INFO_SIZE, &Size);
- if (Err != HSA_STATUS_SUCCESS) {
- return Err;
- }
+ hsa_status_t setupHostMemoryPools(std::vector<hsa_agent_t> &Agents) {
+ std::vector<hsa_amd_memory_pool_t> HostPools;
- if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED &&
- Size > 0) {
- HostFineGrainedMemoryPool = MemoryPool;
+ // collect all the "valid" pools for all the given agents.
+ for (const auto &Agent : Agents) {
+ hsa_status_t Err = hsa_amd_agent_iterate_memory_pools(
+ Agent, core::addMemoryPool, static_cast<void *>(&HostPools));
+ if (Err != HSA_STATUS_SUCCESS) {
+ DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+ "Iterate all memory pools", get_error_string(Err));
+ return Err;
+ }
}
- return HSA_STATUS_SUCCESS;
- }
+ // We need two fine-grained pools.
+ // 1. One with kernarg flag set for storing kernel arguments
+ // 2. Second for host allocations
+ bool FineGrainedMemoryPoolSet = false;
+ bool KernArgPoolSet = false;
+ for (const auto &MemoryPool : HostPools) {
+ hsa_status_t Err = HSA_STATUS_SUCCESS;
+ uint32_t GlobalFlags = 0;
+ Err = hsa_amd_memory_pool_get_info(
+ MemoryPool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &GlobalFlags);
+ if (Err != HSA_STATUS_SUCCESS) {
+ DP("Get memory pool info failed: %s\n", get_error_string(Err));
+ return Err;
+ }
- hsa_status_t setupMemoryPools() {
- using namespace std::placeholders;
- hsa_status_t Err;
- Err = core::collectMemoryPools(
- CPUAgents, std::bind(&RTLDeviceInfoTy::addHostMemoryPool, this, _1, _2));
- if (Err != HSA_STATUS_SUCCESS) {
- DP("HSA error in collecting memory pools for CPU: %s\n",
- get_error_string(Err));
- return Err;
- }
- Err = core::collectMemoryPools(
- HSAAgents, std::bind(&RTLDeviceInfoTy::addDeviceMemoryPool, this, _1, _2));
- if (Err != HSA_STATUS_SUCCESS) {
- DP("HSA error in collecting memory pools for offload devices: %s\n",
- get_error_string(Err));
- return Err;
+ if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED) {
+ if (GlobalFlags & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT) {
+ KernArgPool = MemoryPool;
+ KernArgPoolSet = true;
+ }
+ HostFineGrainedMemoryPool = MemoryPool;
+ FineGrainedMemoryPoolSet = true;
+ }
}
- return HSA_STATUS_SUCCESS;
+
+ if (FineGrainedMemoryPoolSet && KernArgPoolSet)
+ return HSA_STATUS_SUCCESS;
+
+ return HSA_STATUS_ERROR;
}
hsa_amd_memory_pool_t getDeviceMemoryPool(int DeviceId) {
@@ -731,11 +697,6 @@ class RTLDeviceInfoTy {
} else {
DP("There are %d devices supporting HSA.\n", NumberOfDevices);
}
- std::tie(err, KernArgPool) = core::FindKernargPool(CPUAgents);
- if (err != HSA_STATUS_SUCCESS) {
- DP("Error when reading memory pools\n");
- return;
- }
// Init the device info
HSAQueues.resize(NumberOfDevices);
@@ -753,9 +714,15 @@ class RTLDeviceInfoTy {
DeviceCoarseGrainedMemoryPools.resize(NumberOfDevices);
DeviceFineGrainedMemoryPools.resize(NumberOfDevices);
- err = setupMemoryPools();
+ err = setupDevicePools(HSAAgents);
+ if (err != HSA_STATUS_SUCCESS) {
+ DP("Setup for Device Memory Pools failed\n");
+ return;
+ }
+
+ err = setupHostMemoryPools(CPUAgents);
if (err != HSA_STATUS_SUCCESS) {
- DP("Error when setting up memory pools");
+ DP("Setup for Host Memory Pools failed\n");
return;
}
More information about the Openmp-commits
mailing list