[Openmp-commits] [openmp] 3153bdd - [libomptarget][amdgpu] Drop env variables

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Thu Sep 2 03:02:57 PDT 2021


Author: Jon Chesterfield
Date: 2021-09-02T11:02:39+01:00
New Revision: 3153bdd547c393b6b58e7cfaa5a40b246bf4218e

URL: https://github.com/llvm/llvm-project/commit/3153bdd547c393b6b58e7cfaa5a40b246bf4218e
DIFF: https://github.com/llvm/llvm-project/commit/3153bdd547c393b6b58e7cfaa5a40b246bf4218e.diff

LOG: [libomptarget][amdgpu] Drop env variables

Use the same debug print as the rest of libomptarget plugins with
the same environment control. Also drop the max queue size debugging hook as
I don't believe it is still in use, can bring it back near the rest of the env
handling in rtl.cpp if someone objects.

That makes most of rt.h and all of utils.cpp unused. Clean that up and simplify
control flow in a couple of places.

Behaviour change is that debug prints that used to use the old environment
variable now use the new one and print in slightly different format, and the
removal of the max queue size variable.

Reviewed By: pdhaliwal

Differential Revision: https://reviews.llvm.org/D108784

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
    openmp/libomptarget/plugins/amdgpu/impl/data.cpp
    openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
    openmp/libomptarget/plugins/amdgpu/impl/internal.h
    openmp/libomptarget/plugins/amdgpu/impl/rt.h
    openmp/libomptarget/plugins/amdgpu/impl/system.cpp
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    openmp/libomptarget/plugins/amdgpu/impl/utils.cpp


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
index 72cca2691b5cf..29887c280d755 100644
--- a/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/amdgpu/CMakeLists.txt
@@ -67,7 +67,6 @@ add_library(omptarget.rtl.amdgpu SHARED
       impl/data.cpp
       impl/get_elf_mach_gfx_name.cpp
       impl/system.cpp
-      impl/utils.cpp
       impl/msgpack.cpp
       src/rtl.cpp
       ${LIBOMPTARGET_EXTRA_SOURCE}

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
index 7cd7e4d3437e3..67942a8942f34 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/data.cpp
@@ -17,23 +17,21 @@
 using core::TaskImpl;
 
 namespace core {
-
-hsa_status_t Runtime::HostMalloc(void **ptr, size_t size,
-                                 hsa_amd_memory_pool_t MemoryPool) {
+namespace Runtime {
+hsa_status_t HostMalloc(void **ptr, size_t size,
+                        hsa_amd_memory_pool_t MemoryPool) {
   hsa_status_t err = hsa_amd_memory_pool_allocate(MemoryPool, size, 0, ptr);
-  DEBUG_PRINT("Malloced %p\n", *ptr);
-
+  DP("Malloced %p\n", *ptr);
   if (err == HSA_STATUS_SUCCESS) {
     err = core::allow_access_to_all_gpu_agents(*ptr);
   }
-  return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
+  return err;
 }
 
-hsa_status_t Runtime::Memfree(void *ptr) {
+hsa_status_t Memfree(void *ptr) {
   hsa_status_t err = hsa_amd_memory_pool_free(ptr);
-  DEBUG_PRINT("Freed %p\n", ptr);
-
-  return (err == HSA_STATUS_SUCCESS) ? HSA_STATUS_SUCCESS : HSA_STATUS_ERROR;
+  DP("Freed %p\n", ptr);
+  return err;
 }
-
+} // namespace Runtime
 } // namespace core

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
index e3f9f71a3068a..7f841aaa8f0d2 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/impl.cpp
@@ -5,8 +5,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
-#include "impl_runtime.h"
 #include "hsa_api.h"
+#include "impl_runtime.h"
 #include "internal.h"
 #include "rt.h"
 #include <memory>
@@ -32,7 +32,7 @@ static hsa_status_t invoke_hsa_copy(hsa_signal_t sig, void *dest,
   hsa_signal_value_t got = init;
   while (got == init) {
     got = hsa_signal_wait_scacquire(sig, HSA_SIGNAL_CONDITION_NE, init,
-                                    UINT64_MAX, ATMI_WAIT_STATE);
+                                    UINT64_MAX, HSA_WAIT_STATE_BLOCKED);
   }
 
   if (got != success) {
@@ -64,8 +64,7 @@ hsa_status_t impl_memcpy_h2d(hsa_signal_t signal, void *deviceDest,
   void *tempHostPtr;
   hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
   if (ret != HSA_STATUS_SUCCESS) {
-    DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
-                size);
+    DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
     return ret;
   }
   std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);
@@ -94,8 +93,7 @@ hsa_status_t impl_memcpy_d2h(hsa_signal_t signal, void *dest,
   void *tempHostPtr;
   hsa_status_t ret = core::Runtime::HostMalloc(&tempHostPtr, size, MemoryPool);
   if (ret != HSA_STATUS_SUCCESS) {
-    DEBUG_PRINT("HostMalloc: Unable to alloc %zu bytes for temp scratch\n",
-                size);
+    DP("HostMalloc: Unable to alloc %zu bytes for temp scratch\n", size);
     return ret;
   }
   std::unique_ptr<void, implFreePtrDeletor> del(tempHostPtr);

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/internal.h b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
index e5cf047256b70..8bca165dd5aee 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/internal.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/internal.h
@@ -24,7 +24,12 @@
 #include "hsa_api.h"
 
 #include "impl_runtime.h"
-#include "rt.h"
+
+#ifndef TARGET_NAME
+#error "Missing TARGET_NAME macro"
+#endif
+#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
+#include "Debug.h"
 
 #define MAX_NUM_KERNELS (1024 * 16)
 
@@ -41,34 +46,6 @@ typedef struct impl_implicit_args_s {
   unsigned long kernarg_template_ptr;
 } impl_implicit_args_t;
 
-extern "C" {
-
-#ifdef DEBUG
-#define DEBUG_PRINT(fmt, ...)                                                  \
-  if (core::Runtime::getInstance().getDebugMode()) {                           \
-    fprintf(stderr, "[%s:%d] " fmt, __FILE__, __LINE__, ##__VA_ARGS__);        \
-  }
-#else
-#define DEBUG_PRINT(...)                                                       \
-  do {                                                                         \
-  } while (false)
-#endif
-
-#ifndef HSA_RUNTIME_INC_HSA_H_
-typedef struct hsa_signal_s {
-  uint64_t handle;
-} hsa_signal_t;
-#endif
-
-}
-
-/* ---------------------------------------------------------------------------------
- * Simulated CPU Data Structures and API
- * ---------------------------------------------------------------------------------
- */
-
-#define ATMI_WAIT_STATE HSA_WAIT_STATE_BLOCKED
-
 // ---------------------- Kernel Start -------------
 typedef struct atl_kernel_info_s {
   uint64_t kernel_object;
@@ -110,7 +87,7 @@ struct SignalPoolT {
       state.pop();
       hsa_status_t rc = hsa_signal_destroy(signal);
       if (rc != HSA_STATUS_SUCCESS) {
-        DEBUG_PRINT("Signal pool destruction failed\n");
+        DP("Signal pool destruction failed\n");
       }
     }
   }
@@ -183,6 +160,10 @@ bool handle_group_signal(hsa_signal_value_t value, void *arg);
 hsa_status_t allow_access_to_all_gpu_agents(void *ptr);
 } // namespace core
 
-const char *get_error_string(hsa_status_t err);
+inline const char *get_error_string(hsa_status_t err) {
+  const char *res;
+  hsa_status_t rc = hsa_status_string(err, &res);
+  return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
+}
 
 #endif // SRC_RUNTIME_INCLUDE_INTERNAL_H_

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/rt.h b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
index 9d98735320dc7..8b09a8466cccb 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/rt.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/rt.h
@@ -8,74 +8,26 @@
 #ifndef SRC_RUNTIME_INCLUDE_RT_H_
 #define SRC_RUNTIME_INCLUDE_RT_H_
 
-#include "impl_runtime.h"
 #include "hsa_api.h"
+#include "impl_runtime.h"
+#include "internal.h"
+
 #include <string>
 
 namespace core {
-
-#define DEFAULT_MAX_QUEUE_SIZE 4096
-#define DEFAULT_DEBUG_MODE 0
-class Environment {
-public:
-  Environment()
-      : max_queue_size_(DEFAULT_MAX_QUEUE_SIZE),
-        debug_mode_(DEFAULT_DEBUG_MODE) {
-    GetEnvAll();
-  }
-
-  void GetEnvAll();
-
-  int getMaxQueueSize() const { return max_queue_size_; }
-  int getDebugMode() const { return debug_mode_; }
-
-private:
-  std::string GetEnv(const char *name) {
-    char *env = getenv(name);
-    std::string ret;
-    if (env) {
-      ret = env;
-    }
-    return ret;
-  }
-
-  int max_queue_size_;
-  int debug_mode_;
-};
-
-class Runtime final {
-public:
-  static Runtime &getInstance() {
-    static Runtime instance;
-    return instance;
-  }
-
-  // modules
-  static hsa_status_t RegisterModuleFromMemory(
-      void *, size_t, hsa_agent_t agent,
-      hsa_status_t (*on_deserialized_data)(void *data, size_t size,
-                                           void *cb_state),
-      void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
-
-  // data
-  static hsa_status_t Memcpy(hsa_signal_t, void *, const void *, size_t);
-  static hsa_status_t Memfree(void *);
-  static hsa_status_t HostMalloc(void **ptr, size_t size,
-                                 hsa_amd_memory_pool_t MemoryPool);
-
-  int getMaxQueueSize() const { return env_.getMaxQueueSize(); }
-  int getDebugMode() const { return env_.getDebugMode(); }
-
-protected:
-  Runtime() = default;
-  ~Runtime() = default;
-  Runtime(const Runtime &) = delete;
-  Runtime &operator=(const Runtime &) = delete;
-
-protected:
-  // variable to track environment variables
-  Environment env_;
-};
+namespace Runtime {
+hsa_status_t Memfree(void *);
+hsa_status_t HostMalloc(void **ptr, size_t size,
+                        hsa_amd_memory_pool_t MemoryPool);
+
+} // namespace Runtime
+hsa_status_t RegisterModuleFromMemory(
+    std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
+    std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
+    void *module_bytes, size_t module_size, hsa_agent_t agent,
+    hsa_status_t (*on_deserialized_data)(void *data, size_t size,
+                                         void *cb_state),
+    void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
 
 } // namespace core
 

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index 06a8d34c28e5f..fd10bef39099a 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -525,8 +525,8 @@ static hsa_status_t get_code_object_custom_metadata(
         size_t padding = new_offset - offset;
         offset = new_offset;
         info.arg_offsets.push_back(lcArg.offset_);
-        DEBUG_PRINT("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(),
-                    lcArg.size_, lcArg.offset_);
+        DP("Arg[%lu] \"%s\" (%u, %u)\n", i, lcArg.name_.c_str(), lcArg.size_,
+           lcArg.offset_);
         offset += lcArg.size_;
 
         // check if the arg is a hidden/implicit arg
@@ -541,13 +541,13 @@ static hsa_status_t get_code_object_custom_metadata(
     }
 
     // add size of implicit args, e.g.: offset x, y and z and pipe pointer, but
-    // in ATMI, do not count the compiler set implicit args, but set your own
-    // implicit args by discounting the compiler set implicit args
+    // do not count the compiler set implicit args, but set your own implicit
+    // args by discounting the compiler set implicit args
     info.kernel_segment_size =
         (hasHiddenArgs ? kernel_explicit_args_size : kernel_segment_size) +
         sizeof(impl_implicit_args_t);
-    DEBUG_PRINT("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
-                kernel_segment_size, info.kernel_segment_size);
+    DP("[%s: kernarg seg size] (%lu --> %u)\n", kernelName.c_str(),
+       kernel_segment_size, info.kernel_segment_size);
 
     // kernel received, now add it to the kernel info table
     KernelInfoTable[kernelName] = info;
@@ -571,7 +571,7 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
            "Symbol info extraction", get_error_string(err));
     return err;
   }
-  DEBUG_PRINT("Exec Symbol type: %d\n", type);
+  DP("Exec Symbol type: %d\n", type);
   if (type == HSA_SYMBOL_KIND_KERNEL) {
     err = hsa_executable_symbol_get_info(
         symbol, HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, &name_length);
@@ -636,11 +636,10 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
       return err;
     }
 
-    DEBUG_PRINT(
-        "Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
-        "kernarg\n",
-        kernelName.c_str(), info.kernel_object, info.group_segment_size,
-        info.private_segment_size, info.kernel_segment_size);
+    DP("Kernel %s --> %lx symbol %u group segsize %u pvt segsize %u bytes "
+       "kernarg\n",
+       kernelName.c_str(), info.kernel_object, info.group_segment_size,
+       info.private_segment_size, info.kernel_segment_size);
 
     // assign it back to the kernel info table
     KernelInfoTable[kernelName] = info;
@@ -681,12 +680,11 @@ populate_InfoTables(hsa_executable_symbol_t symbol,
       return err;
     }
 
-    DEBUG_PRINT("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr,
-                info.size);
+    DP("Symbol %s = %p (%u bytes)\n", name, (void *)info.addr, info.size);
     SymbolInfoTable[std::string(name)] = info;
     free(name);
   } else {
-    DEBUG_PRINT("Symbol is an indirect function\n");
+    DP("Symbol is an indirect function\n");
   }
   return HSA_STATUS_SUCCESS;
 }
@@ -730,9 +728,8 @@ hsa_status_t RegisterModuleFromMemory(
       err = get_code_object_custom_metadata(module_bytes, module_size,
                                             KernelInfoTable);
       if (err != HSA_STATUS_SUCCESS) {
-        DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-                    "Getting custom code object metadata",
-                    get_error_string(err));
+        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+           "Getting custom code object metadata", get_error_string(err));
         continue;
       }
 
@@ -741,8 +738,8 @@ hsa_status_t RegisterModuleFromMemory(
       err = hsa_code_object_deserialize(module_bytes, module_size, NULL,
                                         &code_object);
       if (err != HSA_STATUS_SUCCESS) {
-        DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-                    "Code Object Deserialization", get_error_string(err));
+        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+           "Code Object Deserialization", get_error_string(err));
         continue;
       }
       assert(0 != code_object.handle);
@@ -763,8 +760,8 @@ hsa_status_t RegisterModuleFromMemory(
       err =
           hsa_executable_load_code_object(executable, agent, code_object, NULL);
       if (err != HSA_STATUS_SUCCESS) {
-        DEBUG_PRINT("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
-                    "Loading the code object", get_error_string(err));
+        DP("[%s:%d] %s failed: %s\n", __FILE__, __LINE__,
+           "Loading the code object", get_error_string(err));
         continue;
       }
 
@@ -772,7 +769,7 @@ hsa_status_t RegisterModuleFromMemory(
     }
     module_load_success = true;
   } while (0);
-  DEBUG_PRINT("Modules loaded successful? %d\n", module_load_success);
+  DP("Modules loaded successful? %d\n", module_load_success);
   if (module_load_success) {
     /* Freeze the executable; it can now be queried for symbols.  */
     err = hsa_executable_freeze(executable, "");

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp b/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
deleted file mode 100644
index a0a9d416eb978..0000000000000
--- a/openmp/libomptarget/plugins/amdgpu/impl/utils.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-//===--- amdgpu/impl/utils.cpp ------------------------------------ C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#include "internal.h"
-#include "rt.h"
-
-#include <stdio.h>
-#include <string>
-
-const char *get_error_string(hsa_status_t err) {
-  const char *res;
-  hsa_status_t rc = hsa_status_string(err, &res);
-  return (rc == HSA_STATUS_SUCCESS) ? res : "HSA_STATUS UNKNOWN.";
-}
-
-namespace core {
-/*
- * Environment variables
- */
-void Environment::GetEnvAll() {
-  std::string var = GetEnv("ATMI_HELP");
-  if (!var.empty()) {
-    printf("ATMI_MAX_HSA_QUEUE_SIZE : positive integer\n"
-           "ATMI_DEBUG : 1 for printing out trace/debug info\n");
-  }
-
-  var = GetEnv("ATMI_MAX_HSA_QUEUE_SIZE");
-  if (!var.empty())
-    max_queue_size_ = std::stoi(var);
-
-  var = GetEnv("ATMI_DEBUG");
-  if (!var.empty())
-    debug_mode_ = std::stoi(var);
-}
-} // namespace core

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 2b131a2784925..bbf73b1c3a3c1 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -28,18 +28,14 @@
 #include "impl_runtime.h"
 
 #include "internal.h"
+#include "rt.h"
 
-#include "Debug.h"
 #include "get_elf_mach_gfx_name.h"
 #include "omptargetplugin.h"
 #include "print_tracing.h"
 
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
-#ifndef TARGET_NAME
-#error "Missing TARGET_NAME macro"
-#endif
-#define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
 
 // hostrpc interface, FIXME: consider moving to its own include these are
 // statically linked into amdgpu/plugin if present from hostrpc_services.a,
@@ -84,16 +80,6 @@ int print_kernel_trace;
 
 #include "elf_common.h"
 
-namespace core {
-hsa_status_t RegisterModuleFromMemory(
-    std::map<std::string, atl_kernel_info_t> &KernelInfo,
-    std::map<std::string, atl_symbol_info_t> &SymbolInfoTable, void *, size_t,
-    hsa_agent_t agent,
-    hsa_status_t (*on_deserialized_data)(void *data, size_t size,
-                                         void *cb_state),
-    void *cb_state, std::vector<hsa_executable_t> &HSAExecutables);
-}
-
 namespace hsa {
 template <typename C> hsa_status_t iterate_agents(C cb) {
   auto L = [](hsa_agent_t agent, void *data) -> hsa_status_t {
@@ -713,7 +699,7 @@ class RTLDeviceInfoTy {
       return;
     }
 
-    // Init hostcall soon after initializing ATMI
+    // Init hostcall soon after initializing hsa
     hostrpc_init();
 
     err = FindAgents([&](hsa_device_type_t DeviceType, hsa_agent_t Agent) {
@@ -775,8 +761,9 @@ class RTLDeviceInfoTy {
           DP("HSA query QUEUE_MAX_SIZE failed for agent %d\n", i);
           return;
         }
-        if (queue_size > core::Runtime::getInstance().getMaxQueueSize()) {
-          queue_size = core::Runtime::getInstance().getMaxQueueSize();
+        enum { MaxQueueSize = 4096 };
+        if (queue_size > MaxQueueSize) {
+          queue_size = MaxQueueSize;
         }
       }
 
@@ -819,7 +806,7 @@ class RTLDeviceInfoTy {
     // impl_finalize removes access to it
     deviceStateStore.clear();
     KernelArgPoolMap.clear();
-    // Terminate hostrpc before finalizing ATMI
+    // Terminate hostrpc before finalizing hsa
     hostrpc_terminate();
 
     hsa_status_t Err;
@@ -1530,7 +1517,7 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
     }
   }
 
-  DP("ATMI module successfully loaded!\n");
+  DP("AMDGPU module successfully loaded!\n");
 
   {
     // the device_State array is either large value in bss or a void* that
@@ -2197,8 +2184,7 @@ int32_t __tgt_rtl_run_target_team_region_locked(
         memcpy((char *)kernarg + sizeof(void *) * i, args[i], sizeof(void *));
       }
 
-      // Initialize implicit arguments. ATMI seems to leave most fields
-      // uninitialized
+      // Initialize implicit arguments. TODO: Which of these can be dropped
       impl_implicit_args_t *impl_args =
           reinterpret_cast<impl_implicit_args_t *>(
               static_cast<char *>(kernarg) + ArgPool->kernarg_segment_size);


        


More information about the Openmp-commits mailing list