[llvm] [llvm][offload] Move AMDGPU offload utilities to LLVM (PR #102487)

Mon Aug 19 09:10:06 PDT 2024

https://github.com/fabianmcg updated https://github.com/llvm/llvm-project/pull/102487

>From 4ce897aa25654272dc33036fe81c626c012c8c6a Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Thu, 8 Aug 2024 14:09:52 +0000
Subject: [PATCH 1/2] [llvm][offload] Move AMDGPU offload utilities to LLVM

This patch moves utilities from `offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h`
to `llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h` to be reused by other
projects.

Concretely the following changes were made:
- Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`.
- Remove unused fields `KernelObject`, `KernelSegmentSize`, `ExplicitArgumentCount`
  and `ImplicitArgumentCount` from `AMDGPUKernelMetaData`.
- Return the produced error if `ELFObj.sections()` failed instead of using `cantFail`.
- Added `AGPRCount` field to `AMDGPUKernelMetaData`.
---
 .../Offloading/AMDGPU/ObjectUtilities.h       |  77 +++++
 .../Frontend/Offloading/AMDGPU/CMakeLists.txt |   8 +
 .../Offloading/AMDGPU/ObjectUtilities.cpp     | 249 ++++++++++++++++
 llvm/lib/Frontend/Offloading/CMakeLists.txt   |   2 +
 offload/plugins-nextgen/amdgpu/CMakeLists.txt |   3 +-
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    |  12 +-
 .../amdgpu/utils/UtilitiesRTL.h               | 266 +-----------------
 7 files changed, 356 insertions(+), 261 deletions(-)
 create mode 100644 llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
 create mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
 create mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp

diff --git a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
new file mode 100644
index 00000000000000..a43e4501813024
--- /dev/null
+++ b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
@@ -0,0 +1,77 @@
+//===---- ObjectUtilities.h - AMDGPU ELF utilities ---------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AMDGPU ELF related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+namespace llvm {
+namespace offloading {
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+                              StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+  /// Constant indicating that a value is invalid.
+  static constexpr uint32_t KInvalidValue =
+      std::numeric_limits<uint32_t>::max();
+  /// The amount of group segment memory required by a work-group in bytes.
+  uint32_t GroupSegmentList = KInvalidValue;
+  /// The amount of fixed private address space memory required for a work-item
+  /// in bytes.
+  uint32_t PrivateSegmentSize = KInvalidValue;
+  /// Number of scalar registers required by a wavefront.
+  uint32_t SGPRCount = KInvalidValue;
+  /// Number of vector registers required by each work-item.
+  uint32_t VGPRCount = KInvalidValue;
+  /// Number of stores from a scalar register to a register allocator created
+  /// spill location.
+  uint32_t SGPRSpillCount = KInvalidValue;
+  /// Number of stores from a vector register to a register allocator created
+  /// spill location.
+  uint32_t VGPRSpillCount = KInvalidValue;
+  /// Number of accumulator registers required by each work-item.
+  uint32_t AGPRCount = KInvalidValue;
+  /// Corresponds to the OpenCL reqd_work_group_size attribute.
+  uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+                                        KInvalidValue};
+  /// Corresponds to the OpenCL work_group_size_hint attribute.
+  uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+  /// Wavefront size.
+  uint32_t WavefrontSize = KInvalidValue;
+  /// Maximum flat work-group size supported by the kernel in work-items.
+  uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+                                 StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+                                 uint16_t &ELFABIVersion);
+} // namespace amdgpu
+} // namespace offloading
+} // namespace llvm
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
new file mode 100644
index 00000000000000..d1d157b0efa315
--- /dev/null
+++ b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_component_library(LLVMFrontendOffloadingAMDGPU
+  ObjectUtilities.cpp
+
+  LINK_COMPONENTS
+  Support
+  BinaryFormat
+  Object
+)
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
new file mode 100644
index 00000000000000..992ee8df630745
--- /dev/null
+++ b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
@@ -0,0 +1,249 @@
+//===---- ObjectUtilities.cpp - AMDGPU ELF utilities -------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines AMDGPU ELF related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::offloading::amdgpu;
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+                                                        uint32_t ImageFlags,
+                                                        StringRef EnvTargetID) {
+  StringRef EnvArch = EnvTargetID.split(":").first;
+
+  // Trivial check if the base processors match.
+  if (EnvArch != ImageArch)
+    return false;
+
+  // Check if the image is requesting xnack on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+    // The image is 'xnack-' so the environment must be 'xnack-'.
+    if (!EnvTargetID.contains("xnack-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+    // The image is 'xnack+' so the environment must be 'xnack+'.
+    if (!EnvTargetID.contains("xnack+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+  default:
+    break;
+  }
+
+  // Check if the image is requesting sramecc on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+    // The image is 'sramecc-' so the environment must be 'sramecc-'.
+    if (!EnvTargetID.contains("sramecc-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+    // The image is 'sramecc+' so the environment must be 'sramecc+'.
+    if (!EnvTargetID.contains("sramecc+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+    break;
+  }
+
+  return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+  KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+      : KernelInfoMap(KIM) {}
+
+  /// Process ELF note to read AMDGPU metadata from respective information
+  /// fields.
+  Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+    if (Note.getName() != "AMDGPU")
+      return Error::success(); // We are not interested in other things
+
+    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+           "Parse AMDGPU MetaData");
+    auto Desc = Note.getDesc(Align);
+    StringRef MsgPackString =
+        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+    msgpack::Document MsgPackDoc;
+    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+      return Error::success();
+
+    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    if (!Verifier.verify(MsgPackDoc.getRoot()))
+      return Error::success();
+
+    auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+    if (auto Err = iterateAMDKernels(RootMap))
+      return Err;
+
+    return Error::success();
+  }
+
+private:
+  /// Extracts the relevant information via simple string look-up in the msgpack
+  /// document elements.
+  Error
+  extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+                    std::string &KernelName,
+                    offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+    if (!V.first.isString())
+      return Error::success();
+
+    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+      return DK.getString() == SK;
+    };
+
+    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+                                           uint32_t *Vals) {
+      assert(DN.isArray() && "MsgPack DocNode is an array node");
+      auto DNA = DN.getArray();
+      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+      int I = 0;
+      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+           ++DNABegin) {
+        Vals[I++] = DNABegin->getUInt();
+      }
+    };
+
+    if (IsKey(V.first, ".name")) {
+      KernelName = V.second.toString();
+    } else if (IsKey(V.first, ".sgpr_count")) {
+      KernelData.SGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".sgpr_spill_count")) {
+      KernelData.SGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_count")) {
+      KernelData.VGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_spill_count")) {
+      KernelData.VGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".agpr_count")) {
+      KernelData.AGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+      KernelData.PrivateSegmentSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+      KernelData.GroupSegmentList = V.second.getUInt();
+    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+    } else if (IsKey(V.first, ".workgroup_size_hint")) {
+      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+    } else if (IsKey(V.first, ".wavefront_size")) {
+      KernelData.WavefrontSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+    }
+
+    return Error::success();
+  }
+
+  /// Get the "amdhsa.kernels" element from the msgpack Document
+  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+    auto Res = MDN.find("amdhsa.kernels");
+    if (Res == MDN.end())
+      return createStringError(inconvertibleErrorCode(),
+                               "Could not find amdhsa.kernels key");
+
+    auto Pair = *Res;
+    assert(Pair.second.isArray() &&
+           "AMDGPU kernel entries are arrays of entries");
+
+    return Pair.second.getArray();
+  }
+
+  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+  /// MapDocNode that either maps a string to a single value (most of them) or
+  /// to another array of things. Currently, we only handle the case that maps
+  /// to scalar value.
+  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+    offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+    std::string KernelName;
+    auto Entry = (*It).getMap();
+    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+        return Err;
+
+    KernelInfoMap.insert({KernelName, KernelData});
+    return Error::success();
+  }
+
+  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+    auto KernelsOrErr = getAMDKernelsArray(MDN);
+    if (auto Err = KernelsOrErr.takeError())
+      return Err;
+
+    auto KernelsArr = *KernelsOrErr;
+    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+      if (!It->isMap())
+        continue; // we expect <key,value> pairs
+
+      // Obtain the value for the different entries. Each array entry is a
+      // MapDocNode
+      if (auto Err = generateKernelInfo(It))
+        return Err;
+    }
+    return Error::success();
+  }
+
+  // Kernel names are the keys
+  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+    MemoryBufferRef MemBuffer,
+    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+    uint16_t &ELFABIVersion) {
+  Error Err = Error::success(); // Used later as out-parameter
+
+  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+  if (auto Err = ELFOrError.takeError())
+    return Err;
+
+  const object::ELF64LEFile ELFObj = ELFOrError.get();
+  Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+  if (!Sections)
+    return Sections.takeError();
+  KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  for (const auto &S : *Sections) {
+    if (S.sh_type != ELF::SHT_NOTE)
+      continue;
+
+    for (const auto N : ELFObj.notes(S, Err)) {
+      if (Err)
+        return Err;
+      // Fills the KernelInfoTabel entries in the reader
+      if ((Err = Reader.processNote(N, S.sh_addralign)))
+        return Err;
+    }
+  }
+  return Error::success();
+}
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index 16e0dcfa0e90d6..ac16aef7d61a79 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(AMDGPU)
+
 add_llvm_component_library(LLVMFrontendOffloading
   Utility.cpp
   OffloadWrapper.cpp
diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index 47cd2feefc7288..fef140e529117e 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
 
 if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
   message(STATUS "Building AMDGPU plugin linked against libhsa")
-  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloadingAMDGPU)
 else()
   message(STATUS "Building AMDGPU plugin for dlopened libhsa")
   target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
   target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloadingAMDGPU)
 endif()
 
 # Configure testing for the AMDGPU plugin. We will build tests if we could a
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 604683370cd27d..a434a0089d5f94 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
   findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
 
   /// Get additional info for kernel, e.g., register spill counts
-  std::optional<utils::KernelMetaDataTy>
+  std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
   getKernelInfo(StringRef Identifier) const {
     auto It = KernelInfoMap.find(Identifier);
 
@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
   /// The exectuable loaded on the agent.
   hsa_executable_t Executable;
   hsa_code_object_t CodeObject;
-  StringMap<utils::KernelMetaDataTy> KernelInfoMap;
+  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
   uint16_t ELFABIVersion;
 };
 
@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
   uint32_t ImplicitArgsSize;
 
   /// Additional Info for the AMD GPU Kernel
-  std::optional<utils::KernelMetaDataTy> KernelInfo;
+  std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
 };
 
 /// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
         utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
     if (!TargeTripleAndFeaturesOrError)
       return TargeTripleAndFeaturesOrError.takeError();
-    return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
-                                           ElfOrErr->getPlatformFlags(),
-                                           *TargeTripleAndFeaturesOrError);
+    return offloading::amdgpu::isImageCompatibleWithEnv(
+        Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+        *TargeTripleAndFeaturesOrError);
   }
 
   bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 58a3b5df00fac6..702dae1751a2d2 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,24 +17,13 @@
 
 #include "omptarget.h"
 
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::ELF;
+#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
 
 namespace llvm {
 namespace omp {
 namespace target {
 namespace plugin {
 namespace utils {
-
 // The implicit arguments of COV5 AMDGPU kernels.
 struct AMDGPUImplicitArgsTy {
   uint32_t BlockCountX;
@@ -61,253 +50,22 @@ inline uint32_t getImplicitArgsSize(uint16_t Version) {
              : sizeof(AMDGPUImplicitArgsTy);
 }
 
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
-                                     StringRef EnvTargetID) {
-  StringRef EnvArch = EnvTargetID.split(":").first;
-
-  // Trivial check if the base processors match.
-  if (EnvArch != ImageArch)
-    return false;
-
-  // Check if the image is requesting xnack on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
-  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
-    // The image is 'xnack-' so the environment must be 'xnack-'.
-    if (!EnvTargetID.contains("xnack-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
-    // The image is 'xnack+' so the environment must be 'xnack+'.
-    if (!EnvTargetID.contains("xnack+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
-  default:
-    break;
-  }
-
-  // Check if the image is requesting sramecc on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
-  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
-    // The image is 'sramecc-' so the environment must be 'sramecc-'.
-    if (!EnvTargetID.contains("sramecc-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
-    // The image is 'sramecc+' so the environment must be 'sramecc+'.
-    if (!EnvTargetID.contains("sramecc+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
-    break;
-  }
-
-  return true;
-}
-
-struct KernelMetaDataTy {
-  uint64_t KernelObject;
-  uint32_t GroupSegmentList;
-  uint32_t PrivateSegmentSize;
-  uint32_t SGPRCount;
-  uint32_t VGPRCount;
-  uint32_t SGPRSpillCount;
-  uint32_t VGPRSpillCount;
-  uint32_t KernelSegmentSize;
-  uint32_t ExplicitArgumentCount;
-  uint32_t ImplicitArgumentCount;
-  uint32_t RequestedWorkgroupSize[3];
-  uint32_t WorkgroupSizeHint[3];
-  uint32_t WavefronSize;
-  uint32_t MaxFlatWorkgroupSize;
-};
-namespace {
-
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
-  KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
-
-  /// Process ELF note to read AMDGPU metadata from respective information
-  /// fields.
-  Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
-    if (Note.getName() != "AMDGPU")
-      return Error::success(); // We are not interested in other things
-
-    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
-           "Parse AMDGPU MetaData");
-    auto Desc = Note.getDesc(Align);
-    StringRef MsgPackString =
-        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
-    msgpack::Document MsgPackDoc;
-    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
-      return Error::success();
-
-    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
-    if (!Verifier.verify(MsgPackDoc.getRoot()))
-      return Error::success();
-
-    auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
-    if (auto Err = iterateAMDKernels(RootMap))
-      return Err;
-
-    return Error::success();
-  }
-
-private:
-  /// Extracts the relevant information via simple string look-up in the msgpack
-  /// document elements.
-  Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
-                          std::string &KernelName,
-                          KernelMetaDataTy &KernelData) {
-    if (!V.first.isString())
-      return Error::success();
-
-    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
-      return DK.getString() == SK;
-    };
-
-    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
-                                           uint32_t *Vals) {
-      assert(DN.isArray() && "MsgPack DocNode is an array node");
-      auto DNA = DN.getArray();
-      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
-      int I = 0;
-      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
-           ++DNABegin) {
-        Vals[I++] = DNABegin->getUInt();
-      }
-    };
-
-    if (IsKey(V.first, ".name")) {
-      KernelName = V.second.toString();
-    } else if (IsKey(V.first, ".sgpr_count")) {
-      KernelData.SGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".sgpr_spill_count")) {
-      KernelData.SGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_count")) {
-      KernelData.VGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_spill_count")) {
-      KernelData.VGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
-      KernelData.PrivateSegmentSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
-      KernelData.GroupSegmentList = V.second.getUInt();
-    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
-      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
-    } else if (IsKey(V.first, ".workgroup_size_hint")) {
-      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
-    } else if (IsKey(V.first, ".wavefront_size")) {
-      KernelData.WavefronSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
-      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
-    }
-
-    return Error::success();
-  }
-
-  /// Get the "amdhsa.kernels" element from the msgpack Document
-  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
-    auto Res = MDN.find("amdhsa.kernels");
-    if (Res == MDN.end())
-      return createStringError(inconvertibleErrorCode(),
-                               "Could not find amdhsa.kernels key");
-
-    auto Pair = *Res;
-    assert(Pair.second.isArray() &&
-           "AMDGPU kernel entries are arrays of entries");
-
-    return Pair.second.getArray();
-  }
-
-  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
-  /// MapDocNode that either maps a string to a single value (most of them) or
-  /// to another array of things. Currently, we only handle the case that maps
-  /// to scalar value.
-  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
-    KernelMetaDataTy KernelData;
-    std::string KernelName;
-    auto Entry = (*It).getMap();
-    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
-      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
-        return Err;
-
-    KernelInfoMap.insert({KernelName, KernelData});
-    return Error::success();
-  }
-
-  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
-  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
-    auto KernelsOrErr = getAMDKernelsArray(MDN);
-    if (auto Err = KernelsOrErr.takeError())
-      return Err;
-
-    auto KernelsArr = *KernelsOrErr;
-    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
-      if (!It->isMap())
-        continue; // we expect <key,value> pairs
-
-      // Obtain the value for the different entries. Each array entry is a
-      // MapDocNode
-      if (auto Err = generateKernelInfo(It))
-        return Err;
-    }
-    return Error::success();
-  }
-
-  // Kernel names are the keys
-  StringMap<KernelMetaDataTy> &KernelInfoMap;
-};
-} // namespace
+/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
+/// `Version` is the ELF ABI version, e.g. COV5.
+uint32_t getImplicitArgsSize(uint16_t Version);
 
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
 /// KernelInfoMap
-inline Error
-readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-                            StringMap<KernelMetaDataTy> &KernelInfoMap,
-                            uint16_t &ELFABIVersion) {
-  Error Err = Error::success(); // Used later as out-parameter
-
-  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
-  if (auto Err = ELFOrError.takeError())
+inline Error readAMDGPUMetaDataFromImage(
+    MemoryBufferRef MemBuffer,
+    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+    uint16_t &ELFABIVersion) {
+  Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+      MemBuffer, KernelInfoMap, ELFABIVersion);
+  if (!Err)
     return Err;
-
-  const object::ELF64LEFile ELFObj = ELFOrError.get();
-  ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
-  KernelInfoReader Reader(KernelInfoMap);
-
-  // Read the code object version from ELF image header
-  auto Header = ELFObj.getHeader();
-  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
   DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
-
-  for (const auto &S : Sections) {
-    if (S.sh_type != ELF::SHT_NOTE)
-      continue;
-
-    for (const auto N : ELFObj.notes(S, Err)) {
-      if (Err)
-        return Err;
-      // Fills the KernelInfoTabel entries in the reader
-      if ((Err = Reader.processNote(N, S.sh_addralign)))
-        return Err;
-    }
-  }
-
-  return Error::success();
+  return Err;
 }
 
 } // namespace utils

>From 9fa49fac25ad0f84dff9e5fcec0531413c6516ba Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Mon, 19 Aug 2024 16:09:07 +0000
Subject: [PATCH 2/2] address reviewer comments

---
 .../Offloading/AMDGPU/ObjectUtilities.h       |  77 ------
 .../llvm/Frontend/Offloading/Utility.h        |  60 +++++
 .../Frontend/Offloading/AMDGPU/CMakeLists.txt |   8 -
 .../Offloading/AMDGPU/ObjectUtilities.cpp     | 249 ------------------
 llvm/lib/Frontend/Offloading/CMakeLists.txt   |   3 +-
 llvm/lib/Frontend/Offloading/Utility.cpp      | 232 ++++++++++++++++
 offload/plugins-nextgen/amdgpu/CMakeLists.txt |   4 +-
 .../amdgpu/utils/UtilitiesRTL.h               |   2 +-
 8 files changed, 296 insertions(+), 339 deletions(-)
 delete mode 100644 llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
 delete mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
 delete mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp

diff --git a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
deleted file mode 100644
index a43e4501813024..00000000000000
--- a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===---- ObjectUtilities.h - AMDGPU ELF utilities ---------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares AMDGPU ELF related utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cstdint>
-
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBufferRef.h"
-
-namespace llvm {
-namespace offloading {
-namespace amdgpu {
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
-                              StringRef EnvTargetID);
-
-/// Struct for holding metadata related to AMDGPU kernels, for more information
-/// about the metadata and its meaning see:
-/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
-struct AMDGPUKernelMetaData {
-  /// Constant indicating that a value is invalid.
-  static constexpr uint32_t KInvalidValue =
-      std::numeric_limits<uint32_t>::max();
-  /// The amount of group segment memory required by a work-group in bytes.
-  uint32_t GroupSegmentList = KInvalidValue;
-  /// The amount of fixed private address space memory required for a work-item
-  /// in bytes.
-  uint32_t PrivateSegmentSize = KInvalidValue;
-  /// Number of scalar registers required by a wavefront.
-  uint32_t SGPRCount = KInvalidValue;
-  /// Number of vector registers required by each work-item.
-  uint32_t VGPRCount = KInvalidValue;
-  /// Number of stores from a scalar register to a register allocator created
-  /// spill location.
-  uint32_t SGPRSpillCount = KInvalidValue;
-  /// Number of stores from a vector register to a register allocator created
-  /// spill location.
-  uint32_t VGPRSpillCount = KInvalidValue;
-  /// Number of accumulator registers required by each work-item.
-  uint32_t AGPRCount = KInvalidValue;
-  /// Corresponds to the OpenCL reqd_work_group_size attribute.
-  uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
-                                        KInvalidValue};
-  /// Corresponds to the OpenCL work_group_size_hint attribute.
-  uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
-  /// Wavefront size.
-  uint32_t WavefrontSize = KInvalidValue;
-  /// Maximum flat work-group size supported by the kernel in work-items.
-  uint32_t MaxFlatWorkgroupSize = KInvalidValue;
-};
-
-/// Reads AMDGPU specific metadata from the ELF file and propagates the
-/// KernelInfoMap.
-Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
-                                 StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
-                                 uint16_t &ELFABIVersion);
-} // namespace amdgpu
-} // namespace offloading
-} // namespace llvm
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index f54dd7ba7ab45f..abaea843848b21 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -9,8 +9,14 @@
 #ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
 #define LLVM_FRONTEND_OFFLOADING_UTILITY_H
 
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
 
 namespace llvm {
 namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
 std::pair<GlobalVariable *, GlobalVariable *>
 getOffloadEntryArray(Module &M, StringRef SectionName);
 
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+                              StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+  /// Constant indicating that a value is invalid.
+  static constexpr uint32_t KInvalidValue =
+      std::numeric_limits<uint32_t>::max();
+  /// The amount of group segment memory required by a work-group in bytes.
+  uint32_t GroupSegmentList = KInvalidValue;
+  /// The amount of fixed private address space memory required for a work-item
+  /// in bytes.
+  uint32_t PrivateSegmentSize = KInvalidValue;
+  /// Number of scalar registers required by a wavefront.
+  uint32_t SGPRCount = KInvalidValue;
+  /// Number of vector registers required by each work-item.
+  uint32_t VGPRCount = KInvalidValue;
+  /// Number of stores from a scalar register to a register allocator created
+  /// spill location.
+  uint32_t SGPRSpillCount = KInvalidValue;
+  /// Number of stores from a vector register to a register allocator created
+  /// spill location.
+  uint32_t VGPRSpillCount = KInvalidValue;
+  /// Number of accumulator registers required by each work-item.
+  uint32_t AGPRCount = KInvalidValue;
+  /// Corresponds to the OpenCL reqd_work_group_size attribute.
+  uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+                                        KInvalidValue};
+  /// Corresponds to the OpenCL work_group_size_hint attribute.
+  uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+  /// Wavefront size.
+  uint32_t WavefrontSize = KInvalidValue;
+  /// Maximum flat work-group size supported by the kernel in work-items.
+  uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+                                 StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+                                 uint16_t &ELFABIVersion);
+} // namespace amdgpu
 } // namespace offloading
 } // namespace llvm
 
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
deleted file mode 100644
index d1d157b0efa315..00000000000000
--- a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-add_llvm_component_library(LLVMFrontendOffloadingAMDGPU
-  ObjectUtilities.cpp
-
-  LINK_COMPONENTS
-  Support
-  BinaryFormat
-  Object
-)
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
deleted file mode 100644
index 992ee8df630745..00000000000000
--- a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-//===---- ObjectUtilities.cpp - AMDGPU ELF utilities -------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines AMDGPU ELF related utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm;
-using namespace llvm::ELF;
-using namespace llvm::offloading::amdgpu;
-
-bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
-                                                        uint32_t ImageFlags,
-                                                        StringRef EnvTargetID) {
-  StringRef EnvArch = EnvTargetID.split(":").first;
-
-  // Trivial check if the base processors match.
-  if (EnvArch != ImageArch)
-    return false;
-
-  // Check if the image is requesting xnack on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
-  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
-    // The image is 'xnack-' so the environment must be 'xnack-'.
-    if (!EnvTargetID.contains("xnack-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
-    // The image is 'xnack+' so the environment must be 'xnack+'.
-    if (!EnvTargetID.contains("xnack+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
-  default:
-    break;
-  }
-
-  // Check if the image is requesting sramecc on or off.
-  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
-  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
-    // The image is 'sramecc-' so the environment must be 'sramecc-'.
-    if (!EnvTargetID.contains("sramecc-"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
-    // The image is 'sramecc+' so the environment must be 'sramecc+'.
-    if (!EnvTargetID.contains("sramecc+"))
-      return false;
-    break;
-  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
-  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
-    break;
-  }
-
-  return true;
-}
-
-namespace {
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
-  KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
-      : KernelInfoMap(KIM) {}
-
-  /// Process ELF note to read AMDGPU metadata from respective information
-  /// fields.
-  Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
-    if (Note.getName() != "AMDGPU")
-      return Error::success(); // We are not interested in other things
-
-    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
-           "Parse AMDGPU MetaData");
-    auto Desc = Note.getDesc(Align);
-    StringRef MsgPackString =
-        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
-    msgpack::Document MsgPackDoc;
-    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
-      return Error::success();
-
-    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
-    if (!Verifier.verify(MsgPackDoc.getRoot()))
-      return Error::success();
-
-    auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
-    if (auto Err = iterateAMDKernels(RootMap))
-      return Err;
-
-    return Error::success();
-  }
-
-private:
-  /// Extracts the relevant information via simple string look-up in the msgpack
-  /// document elements.
-  Error
-  extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
-                    std::string &KernelName,
-                    offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
-    if (!V.first.isString())
-      return Error::success();
-
-    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
-      return DK.getString() == SK;
-    };
-
-    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
-                                           uint32_t *Vals) {
-      assert(DN.isArray() && "MsgPack DocNode is an array node");
-      auto DNA = DN.getArray();
-      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
-      int I = 0;
-      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
-           ++DNABegin) {
-        Vals[I++] = DNABegin->getUInt();
-      }
-    };
-
-    if (IsKey(V.first, ".name")) {
-      KernelName = V.second.toString();
-    } else if (IsKey(V.first, ".sgpr_count")) {
-      KernelData.SGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".sgpr_spill_count")) {
-      KernelData.SGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_count")) {
-      KernelData.VGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".vgpr_spill_count")) {
-      KernelData.VGPRSpillCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".agpr_count")) {
-      KernelData.AGPRCount = V.second.getUInt();
-    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
-      KernelData.PrivateSegmentSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
-      KernelData.GroupSegmentList = V.second.getUInt();
-    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
-      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
-    } else if (IsKey(V.first, ".workgroup_size_hint")) {
-      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
-    } else if (IsKey(V.first, ".wavefront_size")) {
-      KernelData.WavefrontSize = V.second.getUInt();
-    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
-      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
-    }
-
-    return Error::success();
-  }
-
-  /// Get the "amdhsa.kernels" element from the msgpack Document
-  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
-    auto Res = MDN.find("amdhsa.kernels");
-    if (Res == MDN.end())
-      return createStringError(inconvertibleErrorCode(),
-                               "Could not find amdhsa.kernels key");
-
-    auto Pair = *Res;
-    assert(Pair.second.isArray() &&
-           "AMDGPU kernel entries are arrays of entries");
-
-    return Pair.second.getArray();
-  }
-
-  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
-  /// MapDocNode that either maps a string to a single value (most of them) or
-  /// to another array of things. Currently, we only handle the case that maps
-  /// to scalar value.
-  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
-    offloading::amdgpu::AMDGPUKernelMetaData KernelData;
-    std::string KernelName;
-    auto Entry = (*It).getMap();
-    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
-      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
-        return Err;
-
-    KernelInfoMap.insert({KernelName, KernelData});
-    return Error::success();
-  }
-
-  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
-  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
-    auto KernelsOrErr = getAMDKernelsArray(MDN);
-    if (auto Err = KernelsOrErr.takeError())
-      return Err;
-
-    auto KernelsArr = *KernelsOrErr;
-    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
-      if (!It->isMap())
-        continue; // we expect <key,value> pairs
-
-      // Obtain the value for the different entries. Each array entry is a
-      // MapDocNode
-      if (auto Err = generateKernelInfo(It))
-        return Err;
-    }
-    return Error::success();
-  }
-
-  // Kernel names are the keys
-  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
-};
-} // namespace
-
-Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
-    MemoryBufferRef MemBuffer,
-    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
-    uint16_t &ELFABIVersion) {
-  Error Err = Error::success(); // Used later as out-parameter
-
-  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
-  if (auto Err = ELFOrError.takeError())
-    return Err;
-
-  const object::ELF64LEFile ELFObj = ELFOrError.get();
-  Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
-  if (!Sections)
-    return Sections.takeError();
-  KernelInfoReader Reader(KernelInfoMap);
-
-  // Read the code object version from ELF image header
-  auto Header = ELFObj.getHeader();
-  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
-  for (const auto &S : *Sections) {
-    if (S.sh_type != ELF::SHT_NOTE)
-      continue;
-
-    for (const auto N : ELFObj.notes(S, Err)) {
-      if (Err)
-        return Err;
-      // Fills the KernelInfoTabel entries in the reader
-      if ((Err = Reader.processNote(N, S.sh_addralign)))
-        return Err;
-    }
-  }
-  return Error::success();
-}
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index ac16aef7d61a79..ce445ad9cc4cb6 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -1,5 +1,3 @@
-add_subdirectory(AMDGPU)
-
 add_llvm_component_library(LLVMFrontendOffloading
   Utility.cpp
   OffloadWrapper.cpp
@@ -13,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
   LINK_COMPONENTS
   Core
   BinaryFormat
+  Object
   Support
   TransformUtils
   TargetParser
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 919b9462e32d48..010c0bfd3be76b 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -7,10 +7,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
 
   return std::make_pair(EntriesB, EntriesE);
 }
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+                                                        uint32_t ImageFlags,
+                                                        StringRef EnvTargetID) {
+  using namespace llvm::ELF;
+  StringRef EnvArch = EnvTargetID.split(":").first;
+
+  // Trivial check if the base processors match.
+  if (EnvArch != ImageArch)
+    return false;
+
+  // Check if the image is requesting xnack on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+  case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+    // The image is 'xnack-' so the environment must be 'xnack-'.
+    if (!EnvTargetID.contains("xnack-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+    // The image is 'xnack+' so the environment must be 'xnack+'.
+    if (!EnvTargetID.contains("xnack+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+  default:
+    break;
+  }
+
+  // Check if the image is requesting sramecc on or off.
+  switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+  case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+    // The image is 'sramecc-' so the environment must be 'sramecc-'.
+    if (!EnvTargetID.contains("sramecc-"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+    // The image is 'sramecc+' so the environment must be 'sramecc+'.
+    if (!EnvTargetID.contains("sramecc+"))
+      return false;
+    break;
+  case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+  case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+    break;
+  }
+
+  return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+  KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+      : KernelInfoMap(KIM) {}
+
+  /// Process ELF note to read AMDGPU metadata from respective information
+  /// fields.
+  Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+    if (Note.getName() != "AMDGPU")
+      return Error::success(); // We are not interested in other things
+
+    assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+           "Parse AMDGPU MetaData");
+    auto Desc = Note.getDesc(Align);
+    StringRef MsgPackString =
+        StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+    msgpack::Document MsgPackDoc;
+    if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+      return Error::success();
+
+    AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+    if (!Verifier.verify(MsgPackDoc.getRoot()))
+      return Error::success();
+
+    auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+    if (auto Err = iterateAMDKernels(RootMap))
+      return Err;
+
+    return Error::success();
+  }
+
+private:
+  /// Extracts the relevant information via simple string look-up in the msgpack
+  /// document elements.
+  Error
+  extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+                    std::string &KernelName,
+                    offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+    if (!V.first.isString())
+      return Error::success();
+
+    const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+      return DK.getString() == SK;
+    };
+
+    const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+                                           uint32_t *Vals) {
+      assert(DN.isArray() && "MsgPack DocNode is an array node");
+      auto DNA = DN.getArray();
+      assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+      int I = 0;
+      for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+           ++DNABegin) {
+        Vals[I++] = DNABegin->getUInt();
+      }
+    };
+
+    if (IsKey(V.first, ".name")) {
+      KernelName = V.second.toString();
+    } else if (IsKey(V.first, ".sgpr_count")) {
+      KernelData.SGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".sgpr_spill_count")) {
+      KernelData.SGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_count")) {
+      KernelData.VGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".vgpr_spill_count")) {
+      KernelData.VGPRSpillCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".agpr_count")) {
+      KernelData.AGPRCount = V.second.getUInt();
+    } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+      KernelData.PrivateSegmentSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+      KernelData.GroupSegmentList = V.second.getUInt();
+    } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+      GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+    } else if (IsKey(V.first, ".workgroup_size_hint")) {
+      GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+    } else if (IsKey(V.first, ".wavefront_size")) {
+      KernelData.WavefrontSize = V.second.getUInt();
+    } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+      KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+    }
+
+    return Error::success();
+  }
+
+  /// Get the "amdhsa.kernels" element from the msgpack Document
+  Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+    auto Res = MDN.find("amdhsa.kernels");
+    if (Res == MDN.end())
+      return createStringError(inconvertibleErrorCode(),
+                               "Could not find amdhsa.kernels key");
+
+    auto Pair = *Res;
+    assert(Pair.second.isArray() &&
+           "AMDGPU kernel entries are arrays of entries");
+
+    return Pair.second.getArray();
+  }
+
+  /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+  /// MapDocNode that either maps a string to a single value (most of them) or
+  /// to another array of things. Currently, we only handle the case that maps
+  /// to scalar value.
+  Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+    offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+    std::string KernelName;
+    auto Entry = (*It).getMap();
+    for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+      if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+        return Err;
+
+    KernelInfoMap.insert({KernelName, KernelData});
+    return Error::success();
+  }
+
+  /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+  Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+    auto KernelsOrErr = getAMDKernelsArray(MDN);
+    if (auto Err = KernelsOrErr.takeError())
+      return Err;
+
+    auto KernelsArr = *KernelsOrErr;
+    for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+      if (!It->isMap())
+        continue; // we expect <key,value> pairs
+
+      // Obtain the value for the different entries. Each array entry is a
+      // MapDocNode
+      if (auto Err = generateKernelInfo(It))
+        return Err;
+    }
+    return Error::success();
+  }
+
+  // Kernel names are the keys
+  StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+    MemoryBufferRef MemBuffer,
+    StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+    uint16_t &ELFABIVersion) {
+  Error Err = Error::success(); // Used later as out-parameter
+
+  auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+  if (auto Err = ELFOrError.takeError())
+    return Err;
+
+  const object::ELF64LEFile ELFObj = ELFOrError.get();
+  Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+  if (!Sections)
+    return Sections.takeError();
+  KernelInfoReader Reader(KernelInfoMap);
+
+  // Read the code object version from ELF image header
+  auto Header = ELFObj.getHeader();
+  ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+  for (const auto &S : *Sections) {
+    if (S.sh_type != ELF::SHT_NOTE)
+      continue;
+
+    for (const auto N : ELFObj.notes(S, Err)) {
+      if (Err)
+        return Err;
+      // Fills the KernelInfoTabel entries in the reader
+      if ((Err = Reader.processNote(N, S.sh_addralign)))
+        return Err;
+    }
+  }
+  return Error::success();
+}
diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index fef140e529117e..b40c62d43226f4 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,12 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
 
 if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
   message(STATUS "Building AMDGPU plugin linked against libhsa")
-  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloadingAMDGPU)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
 else()
   message(STATUS "Building AMDGPU plugin for dlopened libhsa")
   target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
   target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
-  target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloadingAMDGPU)
+  target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
 endif()
 
 # Configure testing for the AMDGPU plugin. We will build tests if we could a
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 702dae1751a2d2..fb87e96d239bcc 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,7 +17,7 @@
 
 #include "omptarget.h"
 
-#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
+#include "llvm/Frontend/Offloading/Utility.h"
 
 namespace llvm {
 namespace omp {