[Openmp-commits] [openmp] 9b8b7c1 - [AMDGPU][Libomptarget] Delete g_atl_machine global

Pushpinder Singh via Openmp-commits openmp-commits at lists.llvm.org
Tue Aug 24 00:59:50 PDT 2021


Author: Pushpinder Singh
Date: 2021-08-24T07:59:40Z
New Revision: 9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5

URL: https://github.com/llvm/llvm-project/commit/9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5
DIFF: https://github.com/llvm/llvm-project/commit/9b8b7c1180eb3606fb08d2cdd9d6d18fae9cb2f5.diff

LOG: [AMDGPU][Libomptarget] Delete g_atl_machine global

With uses of g_atl_machine gone, a significant portion of dead
code has been removed.

This patch depends on D104691 and D104695.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D104696

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
    openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
    openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
    openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
    openmp/libomptarget/plugins/amdgpu/impl/data.cpp
    openmp/libomptarget/plugins/amdgpu/impl/rt.h
    openmp/libomptarget/plugins/amdgpu/impl/system.cpp
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
    openmp/libomptarget/plugins/amdgpu/impl/machine.h


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
index c464d1202735e..51daa4f4f3d82 100644
--- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -66,7 +66,6 @@ add_library(omptarget.rtl.amdgpu SHARED
       impl/atmi_interop_hsa.cpp
       impl/data.cpp
       impl/get_elf_mach_gfx_name.cpp
-      impl/machine.cpp
       impl/system.cpp
       impl/utils.cpp
       impl/msgpack.cpp

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
index edd099f5f963d..c7cd1fe9546cc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi.cpp
@@ -50,7 +50,8 @@ struct atmiFreePtrDeletor {
 
 hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
                              const void *hostSrc, size_t size,
-                             hsa_agent_t agent) {
+                             hsa_agent_t agent,
+                             hsa_amd_memory_pool_t MemoryPool) {
   hsa_status_t rc = hsa_memory_copy(deviceDest, hostSrc, size);
 
   // hsa_memory_copy sometimes fails in situations where
@@ -61,7 +62,7 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
   }
 
   void *tempHostPtr;
-  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
+  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
   if (ret != HSA_STATUS_SUCCESS) {
     DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
                 size);
@@ -79,7 +80,8 @@ hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
 
 hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
                              const void *deviceSrc, size_t size,
-                             hsa_agent_t agent) {
+                             hsa_agent_t agent,
+                             hsa_amd_memory_pool_t MemoryPool) {
   hsa_status_t rc = hsa_memory_copy(dest, deviceSrc, size);
 
   // hsa_memory_copy sometimes fails in situations where
@@ -90,7 +92,7 @@ hsa_status_t atmi_memcpy_d2h(hsa_signal_t signal, void *dest,
   }
 
   void *tempHostPtr;
-  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size);
+  hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
   if (ret != HSA_STATUS_SUCCESS) {
     DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
                 size);

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
index 93adbaca466e3..e4431e768806f 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_interop_hsa.cpp
@@ -7,11 +7,6 @@
 //===----------------------------------------------------------------------===//
 #include "atmi_interop_hsa.h"
 #include "internal.h"
-#include "machine.h"
-
-// TODO: need to get rid of this as well
-
-extern ATLMachine g_atl_machine;
 
 hsa_status_t atmi_interop_hsa_get_symbol_info(
     const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
index caeff6a2061f3..c00ac8310c562 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/atmi_runtime.h
@@ -55,11 +55,13 @@ hsa_status_t atmi_module_register_from_memory_to_place(
 
 hsa_status_t atmi_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
                              const void *hostSrc, size_t size,
-                             hsa_agent_t agent);
+                             hsa_agent_t agent,
+                             hsa_amd_memory_pool_t MemoryPool);
 
 hsa_status_t atmi_memcpy_d2h(hsa_signal_t sig, void *hostDest,
                              const void *deviceSrc, size_t size,
-                             hsa_agent_t agent);
+                             hsa_agent_t agent,
+                             hsa_amd_memory_pool_t MemoryPool);
 
 /** @} */
 

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index 1e0da87046689..9e65b251576bc 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -8,7 +8,6 @@
 #include "atmi_runtime.h"
 #include "hsa_api.h"
 #include "internal.h"
-#include "machine.h"
 #include "rt.h"
 #include <cassert>
 #include <stdio.h>
@@ -16,47 +15,17 @@
 #include <vector>
 
 using core::TaskImpl;
-extern ATLMachine g_atl_machine;
 
 namespace core {
 
-namespace {
-ATLProcessor &get_processor_by_mem_place(int DeviceId,
-                                         atmi_devtype_t DeviceType) {
-  switch (DeviceType) {
-  case ATMI_DEVTYPE_CPU:
-    return g_atl_machine.processors<ATLCPUProcessor>()[DeviceId];
-  case ATMI_DEVTYPE_GPU:
-    return g_atl_machine.processors<ATLGPUProcessor>()[DeviceId];
-  }
-}
-
-hsa_amd_memory_pool_t get_memory_pool_by_mem_place(int DeviceId,
-                                                   atmi_devtype_t DeviceType) {
-  ATLProcessor &proc = get_processor_by_mem_place(DeviceId, DeviceType);
-  return get_memory_pool(proc, 0 /*Memory Type (always zero) */);
-}
-} // namespace
+hsa_status_t Runtime::HostMalloc(void **ptr, size_t size,
+                                 hsa_amd_memory_pool_t MemoryPool) {
+  hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
+  DEBUG_PRINT("Malloced [CPU %d] %p\n", DeviceId, *ptr);
 
-hsa_status_t Runtime::DeviceMalloc(void **ptr, size_t size, int DeviceId) {
-  return Runtime::Malloc(ptr, size, DeviceId, ATMI_DEVTYPE_GPU);
-}
-
-hsa_status_t Runtime::HostMalloc(void **ptr, size_t size) {
-  hsa_status_t Err = Runtime::Malloc(ptr, size, 0, ATMI_DEVTYPE_CPU);
-  if (Err == HSA_STATUS_SUCCESS) {
-    Err = core::allow_access_to_all_gpu_agents(*ptr);
+  if (err == HSA_STATUS_SUCCESS) {
+    err = core::allow_access_to_all_gpu_agents(*ptr);
   }
-  return Err;
-}
-
-hsa_status_t Runtime::Malloc(void **ptr, size_t size, int DeviceId,
-                             atmi_devtype_t DeviceType) {
-  hsa_amd_memory_pool_t pool =
-      get_memory_pool_by_mem_place(DeviceId, DeviceType);
-  hsa_status_t err = hsa_amd_memory_pool_allocate(pool, size, 0, ptr);
-  DEBUG_PRINT("Malloced [%s %d] %p\n",
-              DeviceType == ATMI_DEVTYPE_CPU ? "CPU" : "GPU", DeviceId, *ptr);
   return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
 }
 

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp b/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
deleted file mode 100644
index 34e1ae3d7e961..0000000000000
--- a/openmp/libomptarget/plugins/amdgpu/impl/machine.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===--- amdgpu/impl/machine.cpp ---------------------------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "machine.h"
-#include "atmi_runtime.h"
-#include "hsa_api.h"
-#include "internal.h"
-#include <cassert>
-#include <stdio.h>
-#include <stdlib.h>
-#include <vector>
-
-extern ATLMachine g_atl_machine;
-
-void ATLProcessor::addMemory(const ATLMemory &mem) {
-  for (auto &mem_obj : memories_) {
-    // if the memory already exists, then just return
-    if (mem.memory().handle == mem_obj.memory().handle)
-      return;
-  }
-  memories_.push_back(mem);
-}
-
-const std::vector<ATLMemory> &ATLProcessor::memories() const {
-  return memories_;
-}
-
-template <> std::vector<ATLCPUProcessor> &ATLMachine::processors() {
-  return cpu_processors_;
-}
-
-template <> std::vector<ATLGPUProcessor> &ATLMachine::processors() {
-  return gpu_processors_;
-}
-
-hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc,
-                                      const int mem_id) {
-  hsa_amd_memory_pool_t pool;
-  const std::vector<ATLMemory> &mems = proc.memories();
-  assert(mems.size() && mem_id >= 0 && mem_id < mems.size() &&
-         "Invalid memory pools for this processor");
-  pool = mems[mem_id].memory();
-  return pool;
-}
-
-template <> void ATLMachine::addProcessor(const ATLCPUProcessor &p) {
-  cpu_processors_.push_back(p);
-}
-
-template <> void ATLMachine::addProcessor(const ATLGPUProcessor &p) {
-  gpu_processors_.push_back(p);
-}

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/machine.h b/openmp/libomptarget/plugins/amdgpu/impl/machine.h
deleted file mode 100644
index d7276a3465199..0000000000000
--- a/openmp/libomptarget/plugins/amdgpu/impl/machine.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//===--- amdgpu/impl/machine.h ------------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef SRC_RUNTIME_INCLUDE_MACHINE_H_
-#define SRC_RUNTIME_INCLUDE_MACHINE_H_
-#include "atmi.h"
-#include "hsa_api.h"
-#include "internal.h"
-#include <vector>
-
-class ATLMemory;
-
-class ATLProcessor {
-public:
-  explicit ATLProcessor(hsa_agent_t agent,
-                        atmi_devtype_t type = ATMI_DEVTYPE_ALL)
-      : agent_(agent), type_(type) {
-    memories_.clear();
-  }
-  void addMemory(const ATLMemory &p);
-  hsa_agent_t agent() const { return agent_; }
-  const std::vector<ATLMemory> &memories() const;
-  atmi_devtype_t type() const { return type_; }
-
-protected:
-  hsa_agent_t agent_;
-  atmi_devtype_t type_;
-  std::vector<ATLMemory> memories_;
-};
-
-class ATLCPUProcessor : public ATLProcessor {
-public:
-  explicit ATLCPUProcessor(hsa_agent_t agent)
-      : ATLProcessor(agent, ATMI_DEVTYPE_CPU) {}
-};
-
-class ATLGPUProcessor : public ATLProcessor {
-public:
-  explicit ATLGPUProcessor(hsa_agent_t agent,
-                           atmi_devtype_t type = ATMI_DEVTYPE_dGPU)
-      : ATLProcessor(agent, type) {}
-};
-
-class ATLMemory {
-public:
-  ATLMemory(hsa_amd_memory_pool_t pool, ATLProcessor p, atmi_memtype_t t)
-      : memory_pool_(pool), processor_(p), type_(t) {}
-  hsa_amd_memory_pool_t memory() const { return memory_pool_; }
-
-  atmi_memtype_t type() const { return type_; }
-
-private:
-  hsa_amd_memory_pool_t memory_pool_;
-  ATLProcessor processor_;
-  atmi_memtype_t type_;
-};
-
-class ATLMachine {
-public:
-  ATLMachine() {
-    cpu_processors_.clear();
-    gpu_processors_.clear();
-  }
-  template <typename T> void addProcessor(const T &p);
-  template <typename T> std::vector<T> &processors();
-
-private:
-  std::vector<ATLCPUProcessor> cpu_processors_;
-  std::vector<ATLGPUProcessor> gpu_processors_;
-};
-
-hsa_amd_memory_pool_t get_memory_pool(const ATLProcessor &proc,
-                                      const int mem_id);
-
-#endif // SRC_RUNTIME_INCLUDE_MACHINE_H_

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index d161e1f217478..bfebd14f49ed2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -60,16 +60,12 @@ class Runtime final {
   // data
   static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
   static hsa_status_t Memfree(void *);
-  static hsa_status_t HostMalloc(void **ptr, size_t size);
-  static hsa_status_t DeviceMalloc(void **ptr, size_t size, int DeviceId);
+  static hsa_status_t HostMalloc(void **ptr, size_t size,
+                                 hsa_amd_memory_pool_t MemoryPool);
 
   int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
   int getDebugMode() const { return env_.getDebugMode(); }
 
-private:
-  static hsa_status_t Malloc(void **ptr, size_t size, int DeviceId,
-                             atmi_devtype_t DeviceType);
-
 protected:
   Runtime() = default;
   ~Runtime() = default;

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index d9cbf461121c4..94c60d1c06156 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -12,7 +12,6 @@
 #include <string>
 
 #include "internal.h"
-#include "machine.h"
 #include "rt.h"
 
 #include "msgpack.h"
@@ -140,241 +139,8 @@ static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
     {"hidden_hostcall_buffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
 };
 
-ATLMachine g_atl_machine;
-
 namespace core {
 
-// Implement memory_pool iteration function
-static hsa_status_t get_memory_pool_info(hsa_amd_memory_pool_t memory_pool,
-                                         void *data) {
-  ATLProcessor *proc = reinterpret_cast<ATLProcessor *>(data);
-  hsa_status_t err = HSA_STATUS_SUCCESS;
-  // Check if the memory_pool is allowed to allocate, i.e. do not return group
-  // memory
-  bool alloc_allowed = false;
-  err = hsa_amd_memory_pool_get_info(
-      memory_pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
-      &alloc_allowed);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Alloc allowed in memory pool check", get_error_string(err));
-    return err;
-  }
-  if (alloc_allowed) {
-    uint32_t global_flag = 0;
-    err = hsa_amd_memory_pool_get_info(
-        memory_pool, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &global_flag);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Get memory pool info", get_error_string(err));
-      return err;
-    }
-    if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) {
-      ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_FINE_GRAINED);
-      proc->addMemory(new_mem);
-    } else {
-      ATLMemory new_mem(memory_pool, *proc, ATMI_MEMTYPE_COARSE_GRAINED);
-      proc->addMemory(new_mem);
-    }
-  }
-
-  return err;
-}
-
-static hsa_status_t get_agent_info(hsa_agent_t agent, void *data) {
-  hsa_status_t err = HSA_STATUS_SUCCESS;
-  hsa_device_type_t device_type;
-  err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "Get device type info", get_error_string(err));
-    return err;
-  }
-  switch (device_type) {
-  case HSA_DEVICE_TYPE_CPU: {
-    ATLCPUProcessor new_proc(agent);
-    err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
-                                             &new_proc);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Iterate all memory pools", get_error_string(err));
-      return err;
-    }
-    g_atl_machine.addProcessor(new_proc);
-  } break;
-  case HSA_DEVICE_TYPE_GPU: {
-    hsa_profile_t profile;
-    err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, &profile);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Query the agent profile", get_error_string(err));
-      return err;
-    }
-    atmi_devtype_t gpu_type;
-    gpu_type =
-        (profile == HSA_PROFILE_FULL) ? ATMI_DEVTYPE_iGPU : ATMI_DEVTYPE_dGPU;
-    ATLGPUProcessor new_proc(agent, gpu_type);
-    err = hsa_amd_agent_iterate_memory_pools(agent, get_memory_pool_info,
-                                             &new_proc);
-    if (err != HSA_STATUS_SUCCESS) {
-      printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-             "Iterate all memory pools", get_error_string(err));
-      return err;
-    }
-    g_atl_machine.addProcessor(new_proc);
-  } break;
-  case HSA_DEVICE_TYPE_DSP: {
-    err = HSA_STATUS_ERROR_INVALID_CODE_OBJECT;
-  } break;
-  }
-
-  return err;
-}
-
-static hsa_status_t init_compute_and_memory() {
-  hsa_status_t err;
-
-  /* Iterate over the agents and pick the gpu agent */
-  err = hsa_iterate_agents(get_agent_info, NULL);
-  if (err == HSA_STATUS_INFO_BREAK) {
-    err = HSA_STATUS_SUCCESS;
-  }
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__, "Getting a gpu agent",
-           get_error_string(err));
-    return err;
-  }
-
-  /* Init all devices or individual device types? */
-  std::vector<ATLCPUProcessor> &cpu_procs =
-      g_atl_machine.processors<ATLCPUProcessor>();
-  std::vector<ATLGPUProcessor> &gpu_procs =
-      g_atl_machine.processors<ATLGPUProcessor>();
-  /* For CPU memory pools, add other devices that can access them directly
-   * or indirectly */
-  for (auto &cpu_proc : cpu_procs) {
-    for (auto &cpu_mem : cpu_proc.memories()) {
-      hsa_amd_memory_pool_t pool = cpu_mem.memory();
-      for (auto &gpu_proc : gpu_procs) {
-        hsa_agent_t agent = gpu_proc.agent();
-        hsa_amd_memory_pool_access_t access;
-        hsa_amd_agent_memory_pool_get_info(
-            agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
-        if (access != 0) {
-          // this means not NEVER, but could be YES or NO
-          // add this memory pool to the proc
-          gpu_proc.addMemory(cpu_mem);
-        }
-      }
-    }
-  }
-
-  /* FIXME: are the below combinations of procs and memory pools needed?
-   * all to all compare procs with their memory pools and add those memory
-   * pools that are accessible by the target procs */
-  for (auto &gpu_proc : gpu_procs) {
-    for (auto &gpu_mem : gpu_proc.memories()) {
-      hsa_amd_memory_pool_t pool = gpu_mem.memory();
-      for (auto &cpu_proc : cpu_procs) {
-        hsa_agent_t agent = cpu_proc.agent();
-        hsa_amd_memory_pool_access_t access;
-        hsa_amd_agent_memory_pool_get_info(
-            agent, pool, HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
-        if (access != 0) {
-          // this means not NEVER, but could be YES or NO
-          // add this memory pool to the proc
-          cpu_proc.addMemory(gpu_mem);
-        }
-      }
-    }
-  }
-
-  size_t num_procs = cpu_procs.size() + gpu_procs.size();
-  int num_iGPUs = 0;
-  int num_dGPUs = 0;
-  for (uint32_t i = 0; i < gpu_procs.size(); i++) {
-    if (gpu_procs[i].type() == ATMI_DEVTYPE_iGPU)
-      num_iGPUs++;
-    else
-      num_dGPUs++;
-  }
-  assert(num_iGPUs + num_dGPUs == gpu_procs.size() &&
-         "Number of dGPUs and iGPUs do not add up");
-  DEBUG_PRINT("CPU Agents: %lu\n", cpu_procs.size());
-  DEBUG_PRINT("iGPU Agents: %d\n", num_iGPUs);
-  DEBUG_PRINT("dGPU Agents: %d\n", num_dGPUs);
-  DEBUG_PRINT("GPU Agents: %lu\n", gpu_procs.size());
-
-  int cpus_begin = 0;
-  int cpus_end = cpu_procs.size();
-  int gpus_begin = cpu_procs.size();
-  int gpus_end = cpu_procs.size() + gpu_procs.size();
-  int proc_index = 0;
-  for (int i = cpus_begin; i < cpus_end; i++) {
-    std::vector<ATLMemory> memories = cpu_procs[proc_index].memories();
-    int fine_memories_size = 0;
-    int coarse_memories_size = 0;
-    DEBUG_PRINT("CPU memory types:\t");
-    for (auto &memory : memories) {
-      atmi_memtype_t type = memory.type();
-      if (type == ATMI_MEMTYPE_FINE_GRAINED) {
-        fine_memories_size++;
-        DEBUG_PRINT("Fine\t");
-      } else {
-        coarse_memories_size++;
-        DEBUG_PRINT("Coarse\t");
-      }
-    }
-    DEBUG_PRINT("\nFine Memories : %d", fine_memories_size);
-    DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size);
-    proc_index++;
-  }
-  proc_index = 0;
-  for (int i = gpus_begin; i < gpus_end; i++) {
-    std::vector<ATLMemory> memories = gpu_procs[proc_index].memories();
-    int fine_memories_size = 0;
-    int coarse_memories_size = 0;
-    DEBUG_PRINT("GPU memory types:\t");
-    for (auto &memory : memories) {
-      atmi_memtype_t type = memory.type();
-      if (type == ATMI_MEMTYPE_FINE_GRAINED) {
-        fine_memories_size++;
-        DEBUG_PRINT("Fine\t");
-      } else {
-        coarse_memories_size++;
-        DEBUG_PRINT("Coarse\t");
-      }
-    }
-    DEBUG_PRINT("\nFine Memories : %d", fine_memories_size);
-    DEBUG_PRINT("\tCoarse Memories : %d\n", coarse_memories_size);
-    proc_index++;
-  }
-  if (num_procs > 0)
-    return HSA_STATUS_SUCCESS;
-  else
-    return HSA_STATUS_ERROR_NOT_INITIALIZED;
-}
-
-hsa_status_t init_hsa() {
-  DEBUG_PRINT("Initializing HSA...");
-  hsa_status_t err = hsa_init();
-  if (err != HSA_STATUS_SUCCESS) {
-    return err;
-  }
-
-  err = init_compute_and_memory();
-  if (err != HSA_STATUS_SUCCESS)
-    return err;
-  if (err != HSA_STATUS_SUCCESS) {
-    printf("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-           "After initializing compute and memory", get_error_string(err));
-    return err;
-  }
-
-  DEBUG_PRINT("done\n");
-  return HSA_STATUS_SUCCESS;
-}
-
 hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
 #if (ROCM_VERSION_MAJOR >= 3) ||                                               \
     (ROCM_VERSION_MAJOR >= 2 && ROCM_VERSION_MINOR >= 3)
@@ -417,7 +183,7 @@ hsa_status_t callbackEvent(const hsa_amd_event_t *event, void *data) {
 
 hsa_status_t atl_init_gpu_context() {
   hsa_status_t err;
-  err = init_hsa();
+  err = hsa_init();
   if (err != HSA_STATUS_SUCCESS)
     return HSA_STATUS_ERROR;
 

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 31adc72e9b929..897f830ea87ba 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -32,7 +32,6 @@
 
 #include "Debug.h"
 #include "get_elf_mach_gfx_name.h"
-#include "machine.h"
 #include "omptargetplugin.h"
 #include "print_tracing.h"
 
@@ -508,7 +507,8 @@ class RTLDeviceInfoTy {
       llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
 
   using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
-                                      size_t size, hsa_agent_t);
+                                      size_t size, hsa_agent_t,
+                                      hsa_amd_memory_pool_t);
   hsa_status_t freesignalpool_memcpy(void *dest, const void *src, size_t size,
                                      MemcpyFunc Func, int32_t deviceId) {
     hsa_agent_t agent = HSAAgents[deviceId];
@@ -516,7 +516,7 @@ class RTLDeviceInfoTy {
     if (s.handle == 0) {
       return HSA_STATUS_ERROR;
     }
-    hsa_status_t r = Func(s, dest, src, size, agent);
+    hsa_status_t r = Func(s, dest, src, size, agent, HostFineGrainedMemoryPool);
     FreeSignalPool.push(s);
     return r;
   }
@@ -1413,7 +1413,8 @@ struct device_environment {
 static hsa_status_t atmi_calloc(void **ret_ptr, size_t size, int DeviceId) {
   uint64_t rounded = 4 * ((size + 3) / 4);
   void *ptr;
-  hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, rounded, DeviceId);
+  hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(DeviceId);
+  hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, rounded, 0, &ptr);
   if (err != HSA_STATUS_SUCCESS) {
     return err;
   }
@@ -1807,7 +1808,8 @@ void *__tgt_rtl_data_alloc(int device_id, int64_t size, void *, int32_t kind) {
     return NULL;
   }
 
-  hsa_status_t err = core::Runtime::DeviceMalloc(&ptr, size, device_id);
+  hsa_amd_memory_pool_t MemoryPool = DeviceInfo.getDeviceMemoryPool(device_id);
+  hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, &ptr);
   DP("Tgt alloc data %ld bytes, (tgt:%016llx).\n", size,
      (long long unsigned)(Elf64_Addr)ptr);
   ptr = (err == HSA_STATUS_SUCCESS) ? ptr : NULL;


        


More information about the Openmp-commits mailing list