[Openmp-commits] [openmp] 9cdaf0b - [openmp][amdgpu][nfc] Inline interop_hsa_get_kernel_info into only caller

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Fri Nov 19 10:45:26 PST 2021


Author: Jon Chesterfield
Date: 2021-11-19T18:45:17Z
New Revision: 9cdaf0b01b298dc05a213fca5943e23a2aec51a2

URL: https://github.com/llvm/llvm-project/commit/9cdaf0b01b298dc05a213fca5943e23a2aec51a2
DIFF: https://github.com/llvm/llvm-project/commit/9cdaf0b01b298dc05a213fca5943e23a2aec51a2.diff

LOG: [openmp][amdgpu][nfc] Inline interop_hsa_get_kernel_info into only caller

Added: 
    

Modified: 
    openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
    openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
    openmp/libomptarget/plugins/amdgpu/impl/system.cpp
    openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
index af3e392ad4a9..f10057d98d21 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.cpp
@@ -37,48 +37,3 @@ hsa_status_t interop_hsa_get_symbol_info(
     return HSA_STATUS_ERROR;
   }
 }
-
-hsa_status_t interop_hsa_get_kernel_info(
-    const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    int DeviceId, const char *kernel_name,
-    hsa_executable_symbol_info_t kernel_info, uint32_t *value) {
-  /*
-     // Typical usage:
-     uint32_t value;
-     interop_hsa_get_kernel_addr(gpu_place, "kernel_name",
-                                  HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
-                                  &val);
-  */
-
-  if (!kernel_name || !value)
-    return HSA_STATUS_ERROR;
-
-  hsa_status_t status = HSA_STATUS_SUCCESS;
-  // get the kernel info
-  std::string kernelStr = std::string(kernel_name);
-  auto It = KernelInfoTable.find(kernelStr);
-  if (It != KernelInfoTable.end()) {
-    atl_kernel_info_t info = It->second;
-    switch (kernel_info) {
-    case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_GROUP_SEGMENT_SIZE:
-      *value = info.group_segment_size;
-      break;
-    case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_PRIVATE_SEGMENT_SIZE:
-      *value = info.private_segment_size;
-      break;
-    case HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE:
-      // return the size for non-implicit args
-      *value = info.kernel_segment_size - sizeof(impl_implicit_args_t);
-      break;
-    default:
-      *value = 0;
-      status = HSA_STATUS_ERROR;
-      break;
-    }
-  } else {
-    *value = 0;
-    status = HSA_STATUS_ERROR;
-  }
-
-  return status;
-}

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
index 86fa49ef6351..488a99e23c38 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
+++ b/openmp/libomptarget/plugins/amdgpu/impl/interop_hsa.h
@@ -21,11 +21,6 @@ hsa_status_t interop_hsa_get_symbol_info(
     const std::map<std::string, atl_symbol_info_t> &SymbolInfoTable,
     int DeviceId, const char *symbol, void **var_addr, unsigned int *var_size);
 
-hsa_status_t interop_hsa_get_kernel_info(
-    const std::map<std::string, atl_kernel_info_t> &KernelInfoTable,
-    int DeviceId, const char *kernel_name, hsa_executable_symbol_info_t info,
-    uint32_t *value);
-
 }
 
 #endif // INCLUDE_INTEROP_HSA_H_

diff  --git a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
index da2ab765e8ab..9151da95d6e4 100644
--- a/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/impl/system.cpp
@@ -46,22 +46,6 @@ typedef struct {
 } Elf_Note;
 #endif
 
-// The following include file and following structs/enums
-// have been replicated on a per-use basis below. For example,
-// llvm::AMDGPU::HSAMD::Kernel::Metadata has several fields,
-// but we may care only about kernargSegmentSize_ for now, so
-// we just include that field in our KernelMD implementation. We
-// chose this approach to replicate in order to avoid forcing
-// a dependency on LLVM_INCLUDE_DIR just to compile the runtime.
-// #include "llvm/Support/AMDGPUMetadata.h"
-// typedef llvm::AMDGPU::HSAMD::Metadata CodeObjectMD;
-// typedef llvm::AMDGPU::HSAMD::Kernel::Metadata KernelMD;
-// typedef llvm::AMDGPU::HSAMD::Kernel::Arg::Metadata KernelArgMD;
-// using llvm::AMDGPU::HSAMD::AccessQualifier;
-// using llvm::AMDGPU::HSAMD::AddressSpaceQualifier;
-// using llvm::AMDGPU::HSAMD::ValueKind;
-// using llvm::AMDGPU::HSAMD::ValueType;
-
 class KernelArgMD {
 public:
   enum class ValueKind {
@@ -99,24 +83,6 @@ class KernelMD {
 };
 
 static const std::map<std::string, KernelArgMD::ValueKind> ArgValueKind = {
-    //    Including only those fields that are relevant to the runtime.
-    //    {"ByValue", KernelArgMD::ValueKind::ByValue},
-    //    {"GlobalBuffer", KernelArgMD::ValueKind::GlobalBuffer},
-    //    {"DynamicSharedPointer",
-    //    KernelArgMD::ValueKind::DynamicSharedPointer},
-    //    {"Sampler", KernelArgMD::ValueKind::Sampler},
-    //    {"Image", KernelArgMD::ValueKind::Image},
-    //    {"Pipe", KernelArgMD::ValueKind::Pipe},
-    //    {"Queue", KernelArgMD::ValueKind::Queue},
-    {"HiddenGlobalOffsetX", KernelArgMD::ValueKind::HiddenGlobalOffsetX},
-    {"HiddenGlobalOffsetY", KernelArgMD::ValueKind::HiddenGlobalOffsetY},
-    {"HiddenGlobalOffsetZ", KernelArgMD::ValueKind::HiddenGlobalOffsetZ},
-    {"HiddenNone", KernelArgMD::ValueKind::HiddenNone},
-    {"HiddenPrintfBuffer", KernelArgMD::ValueKind::HiddenPrintfBuffer},
-    {"HiddenDefaultQueue", KernelArgMD::ValueKind::HiddenDefaultQueue},
-    {"HiddenCompletionAction", KernelArgMD::ValueKind::HiddenCompletionAction},
-    {"HiddenMultiGridSyncArg", KernelArgMD::ValueKind::HiddenMultiGridSyncArg},
-    {"HiddenHostcallBuffer", KernelArgMD::ValueKind::HiddenHostcallBuffer},
     // v3
     //    {"by_value", KernelArgMD::ValueKind::ByValue},
     //    {"global_buffer", KernelArgMD::ValueKind::GlobalBuffer},

diff  --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index cd9f02902be1..906b856c6311 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -1620,12 +1620,24 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
 
     DP("to find the kernel name: %s size: %lu\n", e->name, strlen(e->name));
 
-    uint32_t kernarg_segment_size;
+    // errors in kernarg_segment_size previously treated as = 0 (or as undef)
+    uint32_t kernarg_segment_size = 0;
     auto &KernelInfoMap = DeviceInfo.KernelInfoTable[device_id];
-    hsa_status_t err = interop_hsa_get_kernel_info(
-        KernelInfoMap, device_id, e->name,
-        HSA_EXECUTABLE_SYMBOL_INFO_KERNEL_KERNARG_SEGMENT_SIZE,
-        &kernarg_segment_size);
+    hsa_status_t err = HSA_STATUS_SUCCESS;
+    if (!e->name) {
+      err = HSA_STATUS_ERROR;
+    } else {
+      std::string kernelStr = std::string(e->name);
+      auto It = KernelInfoMap.find(kernelStr);
+      if (It != KernelInfoMap.end()) {
+        atl_kernel_info_t info = It->second;
+        // return the size for non-implicit args
+        kernarg_segment_size =
+            info.kernel_segment_size - sizeof(impl_implicit_args_t);
+      } else {
+        err = HSA_STATUS_ERROR;
+      }
+    }
 
     // each arg is a void * in this openmp implementation
     uint32_t arg_num = kernarg_segment_size / sizeof(void *);


        


More information about the Openmp-commits mailing list