[Openmp-commits] [openmp] cadcaf3 - [AMDGPU][Libomptarget] Drop dead code related to g_atl_machine
Pushpinder Singh via Openmp-commits
openmp-commits at lists.llvm.org
Mon Jun 14 22:21:48 PDT 2021
Author: Pushpinder Singh
Date: 2021-06-15T05:21:35Z
New Revision: cadcaf3f46f6ccd3619cb2dd75bf54ed3080f43d
URL: https://github.com/llvm/llvm-project/commit/cadcaf3f46f6ccd3619cb2dd75bf54ed3080f43d
DIFF: https://github.com/llvm/llvm-project/commit/cadcaf3f46f6ccd3619cb2dd75bf54ed3080f43d.diff
LOG: [AMDGPU][Libomptarget] Drop dead code related to g_atl_machine
This patch includes some changes which deletes the code accessing
g_atl_machine global. Some accesses related to memory_pools are
still remaining.
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D103813
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
openmp/libomptarget/plugins/amdgpu/impl/machine.h
openmp/libomptarget/plugins/amdgpu/impl/rt.h
openmp/libomptarget/plugins/amdgpu/impl/system.cpp
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
index 791b0b30a4295..63a569e5d12fd 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
@@ -25,9 +25,6 @@ hsa_status_t atmi_interop_hsa_get_symbol_info(
if (!symbol || !var_addr || !var_size)
return HSA_STATUS_ERROR;
- if (DeviceId < 0 ||
- DeviceId >= g_atl_machine.processors<ATLGPUProcessor>().size())
- return HSA_STATUS_ERROR;
// get the symbol info
std::string symbolStr = std::string(symbol);
@@ -58,9 +55,6 @@ hsa_status_t atmi_interop_hsa_get_kernel_info(
if (!kernel_name || !value)
return HSA_STATUS_ERROR;
- if (DeviceId < 0 ||
- DeviceId >= g_atl_machine.processors<ATLGPUProcessor>().size())
- return HSA_STATUS_ERROR;
hsa_status_t status = HSA_STATUS_SUCCESS;
// get the kernel info
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.h b/openmp/libomptarget/plugins/amdgpu/impl/machine.h
index 6c3293e300141..fc4a95b3c0f66 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/machine.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/machine.h
@@ -66,9 +66,6 @@ class ATLMachine {
}
template <typename T> void addProcessor(const T &p);
template <typename T> std::vector<T> &processors();
- template <typename T> size_t processorCount() {
- return processors<T>().size();
- }
private:
std::vector<ATLCPUProcessor> cpu_processors_;
@@ -78,14 +75,4 @@ class ATLMachine {
hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc,
const int mem_id);
-extern ATLMachine g_atl_machine;
-template <typename T> T &get_processor(int dev_id) {
- if (dev_id == -1) {
- // user is asking runtime to pick a device
- // best device of this type? pick 0 for now
- dev_id = 0;
- }
- return g_atl_machine.processors<T>()[dev_id];
-}
-
#endif // SRC_RUNTIME_INCLUDE_MACHINE_H_
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index d1e1a9d5ce1d8..90713b7c8f8c6 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -51,7 +51,7 @@ class Runtime final {
// modules
static hsa_status_t RegisterModuleFromMemory(
- void *, size_t, int DeviceId,
+ void *, size_t, hsa_agent_t agent,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index 02b570cf565a8..dd9d5bfa8af75 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -703,7 +703,7 @@ int populate_kernelArgMD(msgpack::byte_range args_element,
} // namespace
static hsa_status_t get_code_object_custom_metadata(
- void *binary, size_t binSize, int gpu,
+ void *binary, size_t binSize,
std::map<std::string, atl_kernel_info_t> &KernelInfoTable) {
// parse code object with
diff erent keys from v2
// also, the kernel name is not the same as the symbol name -- so a
@@ -878,7 +878,7 @@ static hsa_status_t get_code_object_custom_metadata(
}
static hsa_status_t
-populate_InfoTables(hsa_executable_symbol_t symbol, int gpu,
+populate_InfoTables(hsa_executable_symbol_t symbol,
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable) {
hsa_symbol_kind_t type;
@@ -1020,16 +1020,11 @@ populate_InfoTables(hsa_executable_symbol_t symbol, int gpu,
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
- void *module_bytes, size_t module_size, int gpu,
+ void *module_bytes, size_t module_size, hsa_agent_t agent,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables) {
hsa_status_t err;
- assert(gpu >= 0);
-
- DEBUG_PRINT("Trying to load module to GPU-%d\n", gpu);
- ATLGPUProcessor &proc = get_processor<ATLGPUProcessor>(gpu);
- hsa_agent_t agent = proc.agent();
hsa_executable_t executable = {0};
hsa_profile_t agent_profile;
@@ -1058,7 +1053,7 @@ hsa_status_t RegisterModuleFromMemory(
// Some metadata info is not available through ROCr API, so use custom
// code object metadata parsing to collect such metadata info
- err = get_code_object_custom_metadata(module_bytes, module_size, gpu,
+ err = get_code_object_custom_metadata(module_bytes, module_size,
KernelInfoTable);
if (err != HSA_STATUS_SUCCESS) {
DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
@@ -1116,8 +1111,7 @@ hsa_status_t RegisterModuleFromMemory(
err = hsa::executable_iterate_symbols(
executable,
[&](hsa_executable_t, hsa_executable_symbol_t symbol) -> hsa_status_t {
- return populate_InfoTables(symbol, gpu, KernelInfoTable,
- SymbolInfoTable);
+ return populate_InfoTables(symbol, KernelInfoTable, SymbolInfoTable);
});
if (err != HSA_STATUS_SUCCESS) {
printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index ff8bb0ada9d6a..51ff65c7098e6 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -91,12 +91,23 @@ namespace core {
hsa_status_t RegisterModuleFromMemory(
std::map<std::string, atl_kernel_info_t> &KernelInfo,
std::map<std::string, atl_symbol_info_t> &SymbolInfoTable, void *, size_t,
- int DeviceId,
+ hsa_agent_t agent,
hsa_status_t (*on_deserialized_data)(void *data, size_t size,
void *cb_state),
void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
}
+namespace hsa {
+template <typename C> hsa_status_t iterate_agents(C cb) {
+ auto L = [](hsa_agent_t agent, void *data) -> hsa_status_t {
+ C *unwrapped = static_cast<C *>(data);
+ return (*unwrapped)(agent);
+ };
+ return hsa_iterate_agents(L, static_cast<void *>(&cb));
+}
+
+} // namespace hsa
+
/// Keep entries table per device
struct FuncOrGblEntryTy {
__tgt_target_table Table;
@@ -244,14 +255,10 @@ struct KernelTy {
/// FIXME: we may need this to be per device and per library.
std::list<KernelTy> KernelsList;
-static std::vector<hsa_agent_t> find_gpu_agents() {
- std::vector<hsa_agent_t> res;
-
- hsa_status_t err = hsa_iterate_agents(
- [](hsa_agent_t agent, void *data) -> hsa_status_t {
- std::vector<hsa_agent_t> *res =
- static_cast<std::vector<hsa_agent_t> *>(data);
+template <typename Callback> static hsa_status_t FindAgents(Callback CB) {
+ hsa_status_t err =
+ hsa::iterate_agents([&](hsa_agent_t agent) -> hsa_status_t {
hsa_device_type_t device_type;
// get_info fails iff HSA runtime not yet initialized
hsa_status_t err =
@@ -260,18 +267,16 @@ static std::vector<hsa_agent_t> find_gpu_agents() {
printf("rtl.cpp: err %d\n", err);
assert(err == HSA_STATUS_SUCCESS);
- if (device_type == HSA_DEVICE_TYPE_GPU) {
- res->push_back(agent);
- }
+ CB(device_type, agent);
return HSA_STATUS_SUCCESS;
- },
- &res);
+ });
// iterate_agents fails iff HSA runtime not yet initialized
- if (print_kernel_trace > 0 && err != HSA_STATUS_SUCCESS)
+ if (print_kernel_trace > 0 && err != HSA_STATUS_SUCCESS) {
printf("rtl.cpp: err %d\n", err);
- assert(err == HSA_STATUS_SUCCESS);
- return res;
+ }
+
+ return err;
}
static void callbackQueue(hsa_status_t status, hsa_queue_t *source,
@@ -346,8 +351,7 @@ hsa_status_t addKernArgPool(hsa_amd_memory_pool_t MemoryPool, void *Data) {
std::pair<hsa_status_t, hsa_amd_memory_pool_t>
FindKernargPool(const std::vector<hsa_agent_t> &HSAAgents) {
std::vector<hsa_amd_memory_pool_t> KernArgPools;
- for (const auto &processor : g_atl_machine.processors<ATLCPUProcessor>()) {
- hsa_agent_t Agent = processor.agent();
+ for (const auto &Agent : HSAAgents) {
hsa_status_t err = HSA_STATUS_SUCCESS;
err = hsa_amd_agent_iterate_memory_pools(
Agent, addKernArgPool, static_cast<void *>(&KernArgPools));
@@ -384,6 +388,9 @@ class RTLDeviceInfoTy {
std::vector<hsa_agent_t> HSAAgents;
std::vector<hsa_queue_t *> HSAQueues; // one per gpu
+ // CPUs
+ std::vector<hsa_agent_t> CPUAgents;
+
// Device properties
std::vector<int> ComputeUnits;
std::vector<int> GroupsPerDevice;
@@ -538,7 +545,16 @@ class RTLDeviceInfoTy {
// Init hostcall soon after initializing ATMI
hostrpc_init();
- HSAAgents = find_gpu_agents();
+ err = FindAgents([&](hsa_device_type_t DeviceType, hsa_agent_t Agent) {
+ if (DeviceType == HSA_DEVICE_TYPE_CPU) {
+ CPUAgents.push_back(Agent);
+ } else {
+ HSAAgents.push_back(Agent);
+ }
+ });
+ if (err != HSA_STATUS_SUCCESS)
+ return;
+
NumberOfDevices = (int)HSAAgents.size();
if (NumberOfDevices == 0) {
@@ -547,8 +563,7 @@ class RTLDeviceInfoTy {
} else {
DP("There are %d devices supporting HSA.\n", NumberOfDevices);
}
-
- std::tie(err, KernArgPool) = core::FindKernargPool(HSAAgents);
+ std::tie(err, KernArgPool) = core::FindKernargPool(CPUAgents);
if (err != HSA_STATUS_SUCCESS) {
DP("Error when reading memory pools\n");
return;
@@ -1104,8 +1119,9 @@ hsa_status_t module_register_from_memory_to_place(
return (*unwrapped)(data, size);
};
return core::RegisterModuleFromMemory(
- KernelInfoTable, SymbolInfoTable, module_bytes, module_size, DeviceId, L,
- static_cast<void *>(&cb), HSAExecutables);
+ KernelInfoTable, SymbolInfoTable, module_bytes, module_size,
+ DeviceInfo.HSAAgents[DeviceId], L, static_cast<void *>(&cb),
+ HSAExecutables);
}
} // namespace
More information about the Openmp-commits
mailing list