[llvm] cfc76b6 - [llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)

via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 20 06:03:12 PDT 2024


Author: Fabian Mora
Date: 2024-08-20T09:03:06-04:00
New Revision: cfc76b6498a3c03268b3f319249afa4512d68e1c

URL: https://github.com/llvm/llvm-project/commit/cfc76b6498a3c03268b3f319249afa4512d68e1c
DIFF: https://github.com/llvm/llvm-project/commit/cfc76b6498a3c03268b3f319249afa4512d68e1c.diff

LOG: [llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)

This patch moves utilities from
`offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to
`llvm/Frontend/Offloading/Utility.h` to be reused by
other projects.

Concretely the following changes were made:
- Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`.
- Remove unused fields `KernelObject`, `KernelSegmentSize`,
`ExplicitArgumentCount` and `ImplicitArgumentCount` from
`AMDGPUKernelMetaData`.
- Return the produced error if `ELFObj.sections()` failed instead of
using `cantFail`.
- Added `AGPRCount` field to `AMDGPUKernelMetaData`.
- Added a default invalid value to all the fields in
`AMDGPUKernelMetaData`.

Added: 
    

Modified: 
    llvm/include/llvm/Frontend/Offloading/Utility.h
    llvm/lib/Frontend/Offloading/CMakeLists.txt
    llvm/lib/Frontend/Offloading/Utility.cpp
    offload/plugins-nextgen/amdgpu/CMakeLists.txt
    offload/plugins-nextgen/amdgpu/src/rtl.cpp
    offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index f54dd7ba7ab45f..abaea843848b21 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -9,8 +9,14 @@
 #ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
 #define LLVM_FRONTEND_OFFLOADING_UTILITY_H
 
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
 
 namespace llvm {
 namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
 std::pair<GlobalVariable *, GlobalVariable *>
 getOffloadEntryArray(Module &M, StringRef SectionName);
 
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+                              StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+  /// Constant indicating that a value is invalid.
+  static constexpr uint32_t KInvalidValue =
+      std::numeric_limits<uint32_t>::max();
+  /// The amount of group segment memory required by a work-group in bytes.
+  uint32_t GroupSegmentList = KInvalidValue;
+  /// The amount of fixed private address space memory required for a work-item
+  /// in bytes.
+  uint32_t PrivateSegmentSize = KInvalidValue;
+  /// Number of scalar registers required by a wavefront.
+  uint32_t SGPRCount = KInvalidValue;
+  /// Number of vector registers required by each work-item.
+  uint32_t VGPRCount = KInvalidValue;
+  /// Number of stores from a scalar register to a register allocator created
+  /// spill location.
+  uint32_t SGPRSpillCount = KInvalidValue;
+  /// Number of stores from a vector register to a register allocator created
+  /// spill location.
+  uint32_t VGPRSpillCount = KInvalidValue;
+  /// Number of accumulator registers required by each work-item.
+  uint32_t AGPRCount = KInvalidValue;
+  /// Corresponds to the OpenCL reqd_work_group_size attribute.
+  uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+                                        KInvalidValue};
+  /// Corresponds to the OpenCL work_group_size_hint attribute.
+  uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+  /// Wavefront size.
+  uint32_t WavefrontSize = KInvalidValue;
+  /// Maximum flat work-group size supported by the kernel in work-items.
+  uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+                                 StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+                                 uint16_t &ELFABIVersion);
+} // namespace amdgpu
 } // namespace offloading
 } // namespace llvm
 

diff  --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index 16e0dcfa0e90d6..ce445ad9cc4cb6 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
   LINK_COMPONENTS
   Core
   BinaryFormat
+  Object
   Support
   TransformUtils
   TargetParser

diff  --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 919b9462e32d48..010c0bfd3be76b 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -7,10 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
 
   return std::make_pair(EntriesB, EntriesE);
 }
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+                                                        uint32_t ImageFlags,
+                                                        StringRef EnvTargetID) {
+  using namespace llvm::ELF;
+  StringRef EnvArch = EnvTargetID.split(":").first;
+
+  // Trivial check if the base processors match.
+  if (EnvArch != ImageArch)
+    return false;
+
+  // Check if the image is requesting xnack on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+    // The image is 'xnack-' so the environment must be 'xnack-'.
+    if (!EnvTargetID.contains("xnack-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+    // The image is 'xnack+' so the environment must be 'xnack+'.
+    if (!EnvTargetID.contains("xnack+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+  default:
+    break;
+  }
+
+  // Check if the image is requesting sramecc on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+    // The image is 'sramecc-' so the environment must be 'sramecc-'.
+    if (!EnvTargetID.contains("sramecc-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+    // The image is 'sramecc+' so the environment must be 'sramecc+'.
+    if (!EnvTargetID.contains("sramecc+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+    break;
+  }
+
+  return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+  KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+      : KernelInfoMap(KIM) {}
+
+  /// Process ELF note to read AMDGPU metadata from respective information
+  /// fields.
+  Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+    if (Note.getName() != "AMDGPU")
+      return Error::success(); // We are not interested in other things
+
+    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+           "Parse AMDGPU MetaData");
+    auto Desc = Note.getDesc(Align);
+    StringRef MsgPackString =
+        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+    msgpack::Document MsgPackDoc;
+    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+      return Error::success();
+
+    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    if (!Verifier.verify(MsgPackDoc.getRoot()))
+      return Error::success();
+
+    auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+    if (auto Err = iterateAMDKernels(RootMap))
+      return Err;
+
+    return Error::success();
+  }
+
+private:
+  /// Extracts the relevant information via simple string look-up in the msgpack
+  /// document elements.
+  Error
+  extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+                    std::string &KernelName,
+                    offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+    if (!V.first.isString())
+      return Error::success();
+
+    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+      return DK.getString() == SK;
+    };
+
+    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+                                           uint32_t *Vals) {
+      assert(DN.isArray() && "MsgPack DocNode is an array node");
+      auto DNA = DN.getArray();
+      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+      int I = 0;
+      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+           ++DNABegin) {
+        Vals[I++] = DNABegin->getUInt();
+      }
+    };
+
+    if (IsKey(V.first, ".name")) {
+      KernelName = V.second.toString();
+    } else if (IsKey(V.first, ".sgpr_count")) {
+      KernelData.SGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".sgpr_spill_count")) {
+      KernelData.SGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_count")) {
+      KernelData.VGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_spill_count")) {
+      KernelData.VGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".agpr_count")) {
+      KernelData.AGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+      KernelData.PrivateSegmentSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+      KernelData.GroupSegmentList = V.second.getUInt();
+    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+    } else if (IsKey(V.first, ".workgroup_size_hint")) {
+      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+    } else if (IsKey(V.first, ".wavefront_size")) {
+      KernelData.WavefrontSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+    }
+
+    return Error::success();
+  }
+
+  /// Get the "amdhsa.kernels" element from the msgpack Document
+  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+    auto Res = MDN.find("amdhsa.kernels");
+    if (Res == MDN.end())
+      return createStringError(inconvertibleErrorCode(),
+                               "Could not find amdhsa.kernels key");
+
+    auto Pair = *Res;
+    assert(Pair.second.isArray() &&
+           "AMDGPU kernel entries are arrays of entries");
+
+    return Pair.second.getArray();
+  }
+
+  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+  /// MapDocNode that either maps a string to a single value (most of them) or
+  /// to another array of things. Currently, we only handle the case that maps
+  /// to scalar value.
+  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+    offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+    std::string KernelName;
+    auto Entry = (*It).getMap();
+    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+        return Err;
+
+    KernelInfoMap.insert({KernelName, KernelData});
+    return Error::success();
+  }
+
+  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+    auto KernelsOrErr = getAMDKernelsArray(MDN);
+    if (auto Err = KernelsOrErr.takeError())
+      return Err;
+
+    auto KernelsArr = *KernelsOrErr;
+    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+      if (!It->isMap())
+        continue; // we expect <key,value> pairs
+
+      // Obtain the value for the 
diff erent entries. Each array entry is a
+      // MapDocNode
+      if (auto Err = generateKernelInfo(It))
+        return Err;
+    }
+    return Error::success();
+  }
+
+  // Kernel names are the keys
+  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+    MemoryBufferRef MemBuffer,
+    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+    uint16_t &ELFABIVersion) {
+  Error Err = Error::success(); // Used later as out-parameter
+
+  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+  if (auto Err = ELFOrError.takeError())
+    return Err;
+
+  const object::ELF64LEFile ELFObj = ELFOrError.get();
+  Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+  if (!Sections)
+    return Sections.takeError();
+  KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  for (const auto &S : *Sections) {
+    if (S.sh_type != ELF::SHT_NOTE)
+      continue;
+
+    for (const auto N : ELFObj.notes(S, Err)) {
+      if (Err)
+        return Err;
+      // Fills the KernelInfoTabel entries in the reader
+      if ((Err = Reader.processNote(N, S.sh_addralign)))
+        return Err;
+    }
+  }
+  return Error::success();
+}

diff  --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index 47cd2feefc7288..b40c62d43226f4 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
 
 if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
   message(STATUS "Building AMDGPU plugin linked against libhsa")
-  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
 else()
   message(STATUS "Building AMDGPU plugin for dlopened libhsa")
   target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
   target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
 endif()
 
 # Configure testing for the AMDGPU plugin. We will build tests if we could a

diff  --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 604683370cd27d..a434a0089d5f94 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
   findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
 
   /// Get additional info for kernel, e.g., register spill counts
-  std::optional<utils::KernelMetaDataTy>
+  std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
   getKernelInfo(StringRef Identifier) const {
     auto It = KernelInfoMap.find(Identifier);
 
@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
   /// The exectuable loaded on the agent.
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
-  StringMap<utils::KernelMetaDataTy> KernelInfoMap;
+  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
   uint16_t ELFABIVersion;
 };
 
@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
   uint32_t ImplicitArgsSize;
 
   /// Additional Info for the AMD GPU Kernel
-  std::optional<utils::KernelMetaDataTy> KernelInfo;
+  std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
 };
 
 /// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
         utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
     if (!TargeTripleAndFeaturesOrError)
       return TargeTripleAndFeaturesOrError.takeError();
-    return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
-                                           ElfOrErr->getPlatformFlags(),
-                                           *TargeTripleAndFeaturesOrError);
+    return offloading::amdgpu::isImageCompatibleWithEnv(
+        Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+        *TargeTripleAndFeaturesOrError);
   }
 
   bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

diff  --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 58a3b5df00fac6..0b6bc50ebf1d89 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,24 +17,13 @@
 
 #include "omptarget.h"
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::ELF;
+#include "llvm/Frontend/Offloading/Utility.h"
 
 namespace llvm {
 namespace omp {
 namespace target {
 namespace plugin {
 namespace utils {
-
 // The implicit arguments of COV5 AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
   uint32_t BlockCountX;
@@ -55,259 +44,26 @@ struct AMDGPUImplicitArgsTyCOV4 {
   uint8_t Unused[56];
 };
 
+/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
+/// `Version` is the ELF ABI version, e.g. COV5.
 inline uint32_t getImplicitArgsSize(uint16_t Version) {
   return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
              ? sizeof(AMDGPUImplicitArgsTyCOV4)
              : sizeof(AMDGPUImplicitArgsTy);
 }
 
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
-                                     StringRef EnvTargetID) {
-  StringRef EnvArch = EnvTargetID.split(":").first;
-
-  // Trivial check if the base processors match.
-  if (EnvArch != ImageArch)
-    return false;
-
-  // Check if the image is requesting xnack on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
-  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
-    // The image is 'xnack-' so the environment must be 'xnack-'.
-    if (!EnvTargetID.contains("xnack-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
-    // The image is 'xnack+' so the environment must be 'xnack+'.
-    if (!EnvTargetID.contains("xnack+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
-  default:
-    break;
-  }
-
-  // Check if the image is requesting sramecc on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
-  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
-    // The image is 'sramecc-' so the environment must be 'sramecc-'.
-    if (!EnvTargetID.contains("sramecc-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
-    // The image is 'sramecc+' so the environment must be 'sramecc+'.
-    if (!EnvTargetID.contains("sramecc+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
-    break;
-  }
-
-  return true;
-}
-
-struct KernelMetaDataTy {
-  uint64_t KernelObject;
-  uint32_t GroupSegmentList;
-  uint32_t PrivateSegmentSize;
-  uint32_t SGPRCount;
-  uint32_t VGPRCount;
-  uint32_t SGPRSpillCount;
-  uint32_t VGPRSpillCount;
-  uint32_t KernelSegmentSize;
-  uint32_t ExplicitArgumentCount;
-  uint32_t ImplicitArgumentCount;
-  uint32_t RequestedWorkgroupSize[3];
-  uint32_t WorkgroupSizeHint[3];
-  uint32_t WavefronSize;
-  uint32_t MaxFlatWorkgroupSize;
-};
-namespace {
-
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
-  KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
-
-  /// Process ELF note to read AMDGPU metadata from respective information
-  /// fields.
-  Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
-    if (Note.getName() != "AMDGPU")
-      return Error::success(); // We are not interested in other things
-
-    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
-           "Parse AMDGPU MetaData");
-    auto Desc = Note.getDesc(Align);
-    StringRef MsgPackString =
-        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
-    msgpack::Document MsgPackDoc;
-    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
-      return Error::success();
-
-    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
-    if (!Verifier.verify(MsgPackDoc.getRoot()))
-      return Error::success();
-
-    auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
-    if (auto Err = iterateAMDKernels(RootMap))
-      return Err;
-
-    return Error::success();
-  }
-
-private:
-  /// Extracts the relevant information via simple string look-up in the msgpack
-  /// document elements.
-  Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
-                          std::string &KernelName,
-                          KernelMetaDataTy &KernelData) {
-    if (!V.first.isString())
-      return Error::success();
-
-    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
-      return DK.getString() == SK;
-    };
-
-    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
-                                           uint32_t *Vals) {
-      assert(DN.isArray() && "MsgPack DocNode is an array node");
-      auto DNA = DN.getArray();
-      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
-      int I = 0;
-      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
-           ++DNABegin) {
-        Vals[I++] = DNABegin->getUInt();
-      }
-    };
-
-    if (IsKey(V.first, ".name")) {
-      KernelName = V.second.toString();
-    } else if (IsKey(V.first, ".sgpr_count")) {
-      KernelData.SGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".sgpr_spill_count")) {
-      KernelData.SGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_count")) {
-      KernelData.VGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_spill_count")) {
-      KernelData.VGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
-      KernelData.PrivateSegmentSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
-      KernelData.GroupSegmentList = V.second.getUInt();
-    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
-      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
-    } else if (IsKey(V.first, ".workgroup_size_hint")) {
-      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
-    } else if (IsKey(V.first, ".wavefront_size")) {
-      KernelData.WavefronSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
-      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
-    }
-
-    return Error::success();
-  }
-
-  /// Get the "amdhsa.kernels" element from the msgpack Document
-  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
-    auto Res = MDN.find("amdhsa.kernels");
-    if (Res == MDN.end())
-      return createStringError(inconvertibleErrorCode(),
-                               "Could not find amdhsa.kernels key");
-
-    auto Pair = *Res;
-    assert(Pair.second.isArray() &&
-           "AMDGPU kernel entries are arrays of entries");
-
-    return Pair.second.getArray();
-  }
-
-  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
-  /// MapDocNode that either maps a string to a single value (most of them) or
-  /// to another array of things. Currently, we only handle the case that maps
-  /// to scalar value.
-  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
-    KernelMetaDataTy KernelData;
-    std::string KernelName;
-    auto Entry = (*It).getMap();
-    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
-      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
-        return Err;
-
-    KernelInfoMap.insert({KernelName, KernelData});
-    return Error::success();
-  }
-
-  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
-  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
-    auto KernelsOrErr = getAMDKernelsArray(MDN);
-    if (auto Err = KernelsOrErr.takeError())
-      return Err;
-
-    auto KernelsArr = *KernelsOrErr;
-    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
-      if (!It->isMap())
-        continue; // we expect <key,value> pairs
-
-      // Obtain the value for the 
diff erent entries. Each array entry is a
-      // MapDocNode
-      if (auto Err = generateKernelInfo(It))
-        return Err;
-    }
-    return Error::success();
-  }
-
-  // Kernel names are the keys
-  StringMap<KernelMetaDataTy> &KernelInfoMap;
-};
-} // namespace
-
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
-inline Error
-readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-                            StringMap<KernelMetaDataTy> &KernelInfoMap,
-                            uint16_t &ELFABIVersion) {
-  Error Err = Error::success(); // Used later as out-parameter
-
-  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
-  if (auto Err = ELFOrError.takeError())
+inline Error readAMDGPUMetaDataFromImage(
+    MemoryBufferRef MemBuffer,
+    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+    uint16_t &ELFABIVersion) {
+  Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+      MemBuffer, KernelInfoMap, ELFABIVersion);
+  if (!Err)
     return Err;
-
-  const object::ELF64LEFile ELFObj = ELFOrError.get();
-  ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
-  KernelInfoReader Reader(KernelInfoMap);
-
-  // Read the code object version from ELF image header
-  auto Header = ELFObj.getHeader();
-  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
   DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
-
-  for (const auto &S : Sections) {
-    if (S.sh_type != ELF::SHT_NOTE)
-      continue;
-
-    for (const auto N : ELFObj.notes(S, Err)) {
-      if (Err)
-        return Err;
-      // Fills the KernelInfoTabel entries in the reader
-      if ((Err = Reader.processNote(N, S.sh_addralign)))
-        return Err;
-    }
-  }
-
-  return Error::success();
+  return Err;
 }
 
 } // namespace utils


        


More information about the llvm-commits mailing list