[Openmp-commits] [openmp] 9b8b7c1 - [AMDGPU][Libomptarget] Delete g_atl_machine global
Pushpinder Singh via Openmp-commits
openmp-commits at lists.llvm.org
Tue Aug 24 00:59:50 PDT 2021
Author: Pushpinder Singh
Date: 2021-08-24T07:59:40Z
New Revision: 9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5
URL: https://github.com/llvm/llvm-project/commit/9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5
DIFF: https://github.com/llvm/llvm-project/commit/9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5.diff
LOG: [AMDGPU][Libomptarget] Delete g_atl_machine global
With uses of g_atl_machine gone, a significant portion of dead
code has been removed.
This patch depends on D104691 and D104695.
Reviewed By: JonChesterfield
Differential Revision: https://reviews.llvm.org/D104696
Added:
Modified:
openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
openmp/libomptarget/plugins/amdgpu/impl/data.cpp
openmp/libomptarget/plugins/amdgpu/impl/rt.h
openmp/libomptarget/plugins/amdgpu/impl/system.cpp
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
openmp/libomptarget/plugins/amdgpu/impl/machine.h
################################################################################
diff --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
index c464d1202735e..51daa4f4f3d82 100644
--- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -66,7 +66,6 @@ add_library(omptarget.rtl.amdgpu SHARED
impl/atmi_interop_hsa.cpp
impl/data.cpp
impl/get_elf_mach_gfx_name.cpp
- impl/machine.cpp
impl/system.cpp
impl/utils.cpp
impl/msgpack.cpp
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
index edd099f5f963d..c7cd1fe9546cc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
@@ -50,7 +50,8 @@ struct atmiFreePtrDeletor {
hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
const void *hostSrc, size_t size,
- hsa_agent_t agent) {
+ hsa_agent_t agent,
+ hsa_amd_memory_pool_t MemoryPool) {
hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
// hsa_memory_copy sometimes fails in situations where
@@ -61,7 +62,7 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
}
void *tempHostPtr;
- hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
+ hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
size);
@@ -79,7 +80,8 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
const void *deviceSrc, size_t size,
- hsa_agent_t agent) {
+ hsa_agent_t agent,
+ hsa_amd_memory_pool_t MemoryPool) {
hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
// hsa_memory_copy sometimes fails in situations where
@@ -90,7 +92,7 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
}
void *tempHostPtr;
- hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
+ hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
if (ret != HSA_STATUS_SUCCESS) {
DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
size);
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
index 93adbaca466e3..e4431e768806f 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
@@ -7,11 +7,6 @@
//===----------------------------------------------------------------------===//
#include "atmi_interop_hsa.h"
#include "internal.h"
-#include "machine.h"
-
-// TODO: need to get rid of this as well
-
-extern ATLMachine g_atl_machine;
hsa_status_t atmi_interop_hsa_get_symbol_info(
const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
index caeff6a2061f3..c00ac8310c562 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
@@ -55,11 +55,13 @@ hsa_status_t atmi_module_register_from_memory_to_place(
hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
const void *hostSrc, size_t size,
- hsa_agent_t agent);
+ hsa_agent_t agent,
+ hsa_amd_memory_pool_t MemoryPool);
hsa_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest,
const void *deviceSrc, size_t size,
- hsa_agent_t agent);
+ hsa_agent_t agent,
+ hsa_amd_memory_pool_t MemoryPool);
/** @} */
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index 1e0da87046689..9e65b251576bc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -8,7 +8,6 @@
#include "atmi_runtime.h"
#include "hsa_api.h"
#include "internal.h"
-#include "machine.h"
#include "rt.h"
#include <cassert>
#include <stdio.h>
@@ -16,47 +15,17 @@
#include <vector>
using core::TaskImpl;
-extern ATLMachine g_atl_machine;
namespace core {
-namespace {
-ATLProcessor &get_processor_by_mem_place(int DeviceId,
- atmi_devtype_t DeviceType) {
- switch (DeviceType) {
- case ATMI_DEVTYPE_CPU:
- return g_atl_machine.processors<ATLCPUProcessor>()[DeviceId];
- case ATMI_DEVTYPE_GPU:
- return g_atl_machine.processors<ATLGPUProcessor>()[DeviceId];
- }
-}
-
-hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId,
- atmi_devtype_t DeviceType) {
- ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType);
- return get_memory_pool(proc, 0 /*Memory Type (always zero) */);
-}
-} // namespace
+hsa_status_t Runtime::HostMalloc(void **ptr, size_t size,
+ hsa_amd_memory_pool_t MemoryPool) {
+ hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
+ DEBUG_PRINT("Malloced [CPU %d] %p\n", DeviceId, *ptr);
-hsa_status_t Runtime::DeviceMalloc(void **ptr, size_t size, int DeviceId) {
- return Runtime::Malloc(ptr, size, DeviceId, ATMI_DEVTYPE_GPU);
-}
-
-hsa_status_t Runtime::HostMalloc(void **ptr, size_t size) {
- hsa_status_t Err = Runtime::Malloc(ptr, size, 0, ATMI_DEVTYPE_CPU);
- if (Err == HSA_STATUS_SUCCESS) {
- Err = core::allow_access_to_all_gpu_agents(*ptr);
+ if (err == HSA_STATUS_SUCCESS) {
+ err = core::allow_access_to_all_gpu_agents(*ptr);
}
- return Err;
-}
-
-hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
- atmi_devtype_t DeviceType) {
- hsa_amd_memory_pool_t pool =
- get_memory_pool_by_mem_place(DeviceId, DeviceType);
- hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
- DEBUG_PRINT("Malloced [%s %d] %p\n",
- DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr);
return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
}
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp b/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
deleted file mode 100644
index 34e1ae3d7e961..0000000000000
--- a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===--- amdgpu/impl/machine.cpp ---------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "machine.h"
-#include "atmi_runtime.h"
-#include "hsa_api.h"
-#include "internal.h"
-#include <cassert>
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-extern ATLMachine g_atl_machine;
-
-void ATLProcessor::addMemory(const ATLMemory &mem) {
- for (auto &mem_obj : memories_) {
- // if the memory already exists, then just return
- if (mem.memory().handle == mem_obj.memory().handle)
- return;
- }
- memories_.push_back(mem);
-}
-
-const std::vector<ATLMemory> &ATLProcessor::memories() const {
- return memories_;
-}
-
-template <> std::vector<ATLCPUProcessor> &ATLMachine::processors() {
- return cpu_processors_;
-}
-
-template <> std::vector<ATLGPUProcessor> &ATLMachine::processors() {
- return gpu_processors_;
-}
-
-hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc,
- const int mem_id) {
- hsa_amd_memory_pool_t pool;
- const std::vector<ATLMemory> &mems = proc.memories();
- assert(mems.size() && mem_id >= 0 && mem_id < mems.size() &&
- "Invalid memory pools for this processor");
- pool = mems[mem_id].memory();
- return pool;
-}
-
-template <> void ATLMachine::addProcessor(const ATLCPUProcessor &p) {
- cpu_processors_.push_back(p);
-}
-
-template <> void ATLMachine::addProcessor(const ATLGPUProcessor &p) {
- gpu_processors_.push_back(p);
-}
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.h b/openmp/libomptarget/plugins/amdgpu/impl/machine.h
deleted file mode 100644
index d7276a3465199..0000000000000
--- a/openmp/libomptarget/plugins/amdgpu/impl/machine.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===--- amdgpu/impl/machine.h ------------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef SRC_RUNTIME_INCLUDE_MACHINE_H_
-#define SRC_RUNTIME_INCLUDE_MACHINE_H_
-#include "atmi.h"
-#include "hsa_api.h"
-#include "internal.h"
-#include <vector>
-
-class ATLMemory;
-
-class ATLProcessor {
-public:
- explicit ATLProcessor(hsa_agent_t agent,
- atmi_devtype_t type = ATMI_DEVTYPE_ALL)
- : agent_(agent), type_(type) {
- memories_.clear();
- }
- void addMemory(const ATLMemory &p);
- hsa_agent_t agent() const { return agent_; }
- const std::vector<ATLMemory> &memories() const;
- atmi_devtype_t type() const { return type_; }
-
-protected:
- hsa_agent_t agent_;
- atmi_devtype_t type_;
- std::vector<ATLMemory> memories_;
-};
-
-class ATLCPUProcessor : public ATLProcessor {
-public:
- explicit ATLCPUProcessor(hsa_agent_t agent)
- : ATLProcessor(agent, ATMI_DEVTYPE_CPU) {}
-};
-
-class ATLGPUProcessor : public ATLProcessor {
-public:
- explicit ATLGPUProcessor(hsa_agent_t agent,
- atmi_devtype_t type = ATMI_DEVTYPE_dGPU)
- : ATLProcessor(agent, type) {}
-};
-
-class ATLMemory {
-public:
- ATLMemory(hsa_amd_memory_pool_t pool, ATLProcessor p, atmi_memtype_t t)
- : memory_pool_(pool), processor_(p), type_(t) {}
- hsa_amd_memory_pool_t memory() const { return memory_pool_; }
-
- atmi_memtype_t type() const { return type_; }
-
-private:
- hsa_amd_memory_pool_t memory_pool_;
- ATLProcessor processor_;
- atmi_memtype_t type_;
-};
-
-class ATLMachine {
-public:
- ATLMachine() {
- cpu_processors_.clear();
- gpu_processors_.clear();
- }
- template <typename T> void addProcessor(const T &p);
- template <typename T> std::vector<T> &processors();
-
-private:
- std::vector<ATLCPUProcessor> cpu_processors_;
- std::vector<ATLGPUProcessor> gpu_processors_;
-};
-
-hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc,
- const int mem_id);
-
-#endif // SRC_RUNTIME_INCLUDE_MACHINE_H_
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index d161e1f217478..bfebd14f49ed2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -60,16 +60,12 @@ class Runtime final {
// data
static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
static hsa_status_t Memfree(void *);
- static hsa_status_t HostMalloc(void **ptr, size_t size);
- static hsa_status_t DeviceMalloc(void **ptr, size_t size, int DeviceId);
+ static hsa_status_t HostMalloc(void **ptr, size_t size,
+ hsa_amd_memory_pool_t MemoryPool);
int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
int getDebugMode() const { return env_.getDebugMode(); }
-private:
- static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
- atmi_devtype_t DeviceType);
-
protected:
Runtime() = default;
~Runtime() = default;
diff --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index d9cbf461121c4..94c60d1c06156 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -12,7 +12,6 @@
#include <string>
#include "internal.h"
-#include "machine.h"
#include "rt.h"
#include "msgpack.h"
@@ -140,241 +139,8 @@ static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
{"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
};
-ATLMachine g_atl_machine;
-
namespace core {
-// Implement memory_pool iteration function
-static hsa_status_t get_memory_pool_info(hsa_amd_memory_pool_t memory_pool,
- void *data) {
- ATLProcessor *proc = reinterpret_cast<ATLProcessor *>(data);
- hsa_status_t err = HSA_STATUS_SUCCESS;
- // Check if the memory_pool is allowed to allocate, i.e. do not return group
- // memory
- bool alloc_allowed = false;
- err = hsa_amd_memory_pool_get_info(
- memory_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
- &alloc_allowed);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Alloc allowed in memory pool check", get_error_string(err));
- return err;
- }
- if (alloc_allowed) {
- uint32_t global_flag = 0;
- err = hsa_amd_memory_pool_get_info(
- memory_pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Get memory pool info", get_error_string(err));
- return err;
- }
- if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
- ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_FINE_GRAINED);
- proc->addMemory(new_mem);
- } else {
- ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_COARSE_GRAINED);
- proc->addMemory(new_mem);
- }
- }
-
- return err;
-}
-
-static hsa_status_t get_agent_info(hsa_agent_t agent, void *data) {
- hsa_status_t err = HSA_STATUS_SUCCESS;
- hsa_device_type_t device_type;
- err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Get device type info", get_error_string(err));
- return err;
- }
- switch (device_type) {
- case HSA_DEVICE_TYPE_CPU: {
- ATLCPUProcessor new_proc(agent);
- err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
- &new_proc);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Iterate all memory pools", get_error_string(err));
- return err;
- }
- g_atl_machine.addProcessor(new_proc);
- } break;
- case HSA_DEVICE_TYPE_GPU: {
- hsa_profile_t profile;
- err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Query the agent profile", get_error_string(err));
- return err;
- }
- atmi_devtype_t gpu_type;
- gpu_type =
- (profile == HSA_PROFILE_FULL) ? ATMI_DEVTYPE_iGPU : ATMI_DEVTYPE_dGPU;
- ATLGPUProcessor new_proc(agent, gpu_type);
- err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
- &new_proc);
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "Iterate all memory pools", get_error_string(err));
- return err;
- }
- g_atl_machine.addProcessor(new_proc);
- } break;
- case HSA_DEVICE_TYPE_DSP: {
- err = HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
- } break;
- }
-
- return err;
-}
-
-static hsa_status_t init_compute_and_memory() {
- hsa_status_t err;
-
- /* Iterate over the agents and pick the gpu agent */
- err = hsa_iterate_agents(get_agent_info, NULL);
- if (err == HSA_STATUS_INFO_BREAK) {
- err = HSA_STATUS_SUCCESS;
- }
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Getting a gpu agent",
- get_error_string(err));
- return err;
- }
-
- /* Init all devices or individual device types? */
- std::vector<ATLCPUProcessor> &cpu_procs =
- g_atl_machine.processors<ATLCPUProcessor>();
- std::vector<ATLGPUProcessor> &gpu_procs =
- g_atl_machine.processors<ATLGPUProcessor>();
- /* For CPU memory pools, add other devices that can access them directly
- * or indirectly */
- for (auto &cpu_proc : cpu_procs) {
- for (auto &cpu_mem : cpu_proc.memories()) {
- hsa_amd_memory_pool_t pool = cpu_mem.memory();
- for (auto &gpu_proc : gpu_procs) {
- hsa_agent_t agent = gpu_proc.agent();
- hsa_amd_memory_pool_access_t access;
- hsa_amd_agent_memory_pool_get_info(
- agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
- if (access != 0) {
- // this means not NEVER, but could be YES or NO
- // add this memory pool to the proc
- gpu_proc.addMemory(cpu_mem);
- }
- }
- }
- }
-
- /* FIXME: are the below combinations of procs and memory pools needed?
- * all to all compare procs with their memory pools and add those memory
- * pools that are accessible by the target procs */
- for (auto &gpu_proc : gpu_procs) {
- for (auto &gpu_mem : gpu_proc.memories()) {
- hsa_amd_memory_pool_t pool = gpu_mem.memory();
- for (auto &cpu_proc : cpu_procs) {
- hsa_agent_t agent = cpu_proc.agent();
- hsa_amd_memory_pool_access_t access;
- hsa_amd_agent_memory_pool_get_info(
- agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
- if (access != 0) {
- // this means not NEVER, but could be YES or NO
- // add this memory pool to the proc
- cpu_proc.addMemory(gpu_mem);
- }
- }
- }
- }
-
- size_t num_procs = cpu_procs.size() + gpu_procs.size();
- int num_iGPUs = 0;
- int num_dGPUs = 0;
- for (uint32_t i = 0; i < gpu_procs.size(); i++) {
- if (gpu_procs[i].type() == ATMI_DEVTYPE_iGPU)
- num_iGPUs++;
- else
- num_dGPUs++;
- }
- assert(num_iGPUs + num_dGPUs == gpu_procs.size() &&
- "Number of dGPUs and iGPUs do not add up");
- DEBUG_PRINT("CPU Agents: %lu\n", cpu_procs.size());
- DEBUG_PRINT("iGPU Agents: %d\n", num_iGPUs);
- DEBUG_PRINT("dGPU Agents: %d\n", num_dGPUs);
- DEBUG_PRINT("GPU Agents: %lu\n", gpu_procs.size());
-
- int cpus_begin = 0;
- int cpus_end = cpu_procs.size();
- int gpus_begin = cpu_procs.size();
- int gpus_end = cpu_procs.size() + gpu_procs.size();
- int proc_index = 0;
- for (int i = cpus_begin; i < cpus_end; i++) {
- std::vector<ATLMemory> memories = cpu_procs[proc_index].memories();
- int fine_memories_size = 0;
- int coarse_memories_size = 0;
- DEBUG_PRINT("CPU memory types:\t");
- for (auto &memory : memories) {
- atmi_memtype_t type = memory.type();
- if (type == ATMI_MEMTYPE_FINE_GRAINED) {
- fine_memories_size++;
- DEBUG_PRINT("Fine\t");
- } else {
- coarse_memories_size++;
- DEBUG_PRINT("Coarse\t");
- }
- }
- DEBUG_PRINT("\nFine Memories : %d", fine_memories_size);
- DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size);
- proc_index++;
- }
- proc_index = 0;
- for (int i = gpus_begin; i < gpus_end; i++) {
- std::vector<ATLMemory> memories = gpu_procs[proc_index].memories();
- int fine_memories_size = 0;
- int coarse_memories_size = 0;
- DEBUG_PRINT("GPU memory types:\t");
- for (auto &memory : memories) {
- atmi_memtype_t type = memory.type();
- if (type == ATMI_MEMTYPE_FINE_GRAINED) {
- fine_memories_size++;
- DEBUG_PRINT("Fine\t");
- } else {
- coarse_memories_size++;
- DEBUG_PRINT("Coarse\t");
- }
- }
- DEBUG_PRINT("\nFine Memories : %d", fine_memories_size);
- DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size);
- proc_index++;
- }
- if (num_procs > 0)
- return HSA_STATUS_SUCCESS;
- else
- return HSA_STATUS_ERROR_NOT_INITIALIZED;
-}
-
-hsa_status_t init_hsa() {
- DEBUG_PRINT("Initializing HSA...");
- hsa_status_t err = hsa_init();
- if (err != HSA_STATUS_SUCCESS) {
- return err;
- }
-
- err = init_compute_and_memory();
- if (err != HSA_STATUS_SUCCESS)
- return err;
- if (err != HSA_STATUS_SUCCESS) {
- printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
- "After initializing compute and memory", get_error_string(err));
- return err;
- }
-
- DEBUG_PRINT("done\n");
- return HSA_STATUS_SUCCESS;
-}
-
hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
#if (ROCM_VERSION_MAJOR >= 3) || \
(ROCM_VERSION_MAJOR >= 2 && ROCM_VERSION_MINOR >= 3)
@@ -417,7 +183,7 @@ hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
hsa_status_t atl_init_gpu_context() {
hsa_status_t err;
- err = init_hsa();
+ err = hsa_init();
if (err != HSA_STATUS_SUCCESS)
return HSA_STATUS_ERROR;
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 31adc72e9b929..897f830ea87ba 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -32,7 +32,6 @@
#include "Debug.h"
#include "get_elf_mach_gfx_name.h"
-#include "machine.h"
#include "omptargetplugin.h"
#include "print_tracing.h"
@@ -508,7 +507,8 @@ class RTLDeviceInfoTy {
llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
- size_t size, hsa_agent_t);
+ size_t size, hsa_agent_t,
+ hsa_amd_memory_pool_t);
hsa_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size,
MemcpyFunc Func, int32_t deviceId) {
hsa_agent_t agent = HSAAgents[deviceId];
@@ -516,7 +516,7 @@ class RTLDeviceInfoTy {
if (s.handle == 0) {
return HSA_STATUS_ERROR;
}
- hsa_status_t r = Func(s, dest, src, size, agent);
+ hsa_status_t r = Func(s, dest, src, size, agent, HostFineGrainedMemoryPool);
FreeSignalPool.push(s);
return r;
}
@@ -1413,7 +1413,8 @@ struct device_environment {
static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
uint64_t rounded = 4 * ((size + 3) / 4);
void *ptr;
- hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, rounded, DeviceId);
+ hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId);
+ hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, rounded, 0, &ptr);
if (err != HSA_STATUS_SUCCESS) {
return err;
}
@@ -1807,7 +1808,8 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
return NULL;
}
- hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, size, device_id);
+ hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(device_id);
+ hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, &ptr);
DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
(long long unsigned)(Elf64_Addr)ptr);
ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;
More information about the Openmp-commits
mailing list