[llvm] [llvm][offload] Move AMDGPU offload utilities to LLVM (PR #102487)
Fabian Mora via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 19 09:10:06 PDT 2024
https://github.com/fabianmcg updated https://github.com/llvm/llvm-project/pull/102487
>From 4ce897aa25654272dc33036fe81c626c012c8c6a Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Thu, 8 Aug 2024 14:09:52 +0000
Subject: [PATCH 1/2] [llvm][offload] Move AMDGPU offload utilities to LLVM
This patch moves utilities from `offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h`
to `llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h` to be reused by other
projects.
Concretely the following changes were made:
- Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`.
- Remove unused fields `KernelObject`, `KernelSegmentSize`, `ExplicitArgumentCount`
and `ImplicitArgumentCount` from `AMDGPUKernelMetaData`.
- Return the produced error if `ELFObj.sections()` failed instead of using `cantFail`.
- Added `AGPRCount` field to `AMDGPUKernelMetaData`.
---
.../Offloading/AMDGPU/ObjectUtilities.h | 77 +++++
.../Frontend/Offloading/AMDGPU/CMakeLists.txt | 8 +
.../Offloading/AMDGPU/ObjectUtilities.cpp | 249 ++++++++++++++++
llvm/lib/Frontend/Offloading/CMakeLists.txt | 2 +
offload/plugins-nextgen/amdgpu/CMakeLists.txt | 3 +-
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 12 +-
.../amdgpu/utils/UtilitiesRTL.h | 266 +-----------------
7 files changed, 356 insertions(+), 261 deletions(-)
create mode 100644 llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
create mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
create mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
diff --git a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
new file mode 100644
index 00000000000000..a43e4501813024
--- /dev/null
+++ b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
@@ -0,0 +1,77 @@
+//===---- ObjectUtilities.h - AMDGPU ELF utilities ---------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares AMDGPU ELF related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
+
+namespace llvm {
+namespace offloading {
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+ StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+ /// Constant indicating that a value is invalid.
+ static constexpr uint32_t KInvalidValue =
+ std::numeric_limits<uint32_t>::max();
+ /// The amount of group segment memory required by a work-group in bytes.
+ uint32_t GroupSegmentList = KInvalidValue;
+ /// The amount of fixed private address space memory required for a work-item
+ /// in bytes.
+ uint32_t PrivateSegmentSize = KInvalidValue;
+ /// Number of scalar registers required by a wavefront.
+ uint32_t SGPRCount = KInvalidValue;
+ /// Number of vector registers required by each work-item.
+ uint32_t VGPRCount = KInvalidValue;
+ /// Number of stores from a scalar register to a register allocator created
+ /// spill location.
+ uint32_t SGPRSpillCount = KInvalidValue;
+ /// Number of stores from a vector register to a register allocator created
+ /// spill location.
+ uint32_t VGPRSpillCount = KInvalidValue;
+ /// Number of accumulator registers required by each work-item.
+ uint32_t AGPRCount = KInvalidValue;
+ /// Corresponds to the OpenCL reqd_work_group_size attribute.
+ uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+ KInvalidValue};
+ /// Corresponds to the OpenCL work_group_size_hint attribute.
+ uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+ /// Wavefront size.
+ uint32_t WavefrontSize = KInvalidValue;
+ /// Maximum flat work-group size supported by the kernel in work-items.
+ uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+ StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion);
+} // namespace amdgpu
+} // namespace offloading
+} // namespace llvm
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
new file mode 100644
index 00000000000000..d1d157b0efa315
--- /dev/null
+++ b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
@@ -0,0 +1,8 @@
+add_llvm_component_library(LLVMFrontendOffloadingAMDGPU
+ ObjectUtilities.cpp
+
+ LINK_COMPONENTS
+ Support
+ BinaryFormat
+ Object
+)
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
new file mode 100644
index 00000000000000..992ee8df630745
--- /dev/null
+++ b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
@@ -0,0 +1,249 @@
+//===---- ObjectUtilities.cpp - AMDGPU ELF utilities -------------- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines AMDGPU ELF related utilities.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
+
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
+
+using namespace llvm;
+using namespace llvm::ELF;
+using namespace llvm::offloading::amdgpu;
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+ uint32_t ImageFlags,
+ StringRef EnvTargetID) {
+ StringRef EnvArch = EnvTargetID.split(":").first;
+
+ // Trivial check if the base processors match.
+ if (EnvArch != ImageArch)
+ return false;
+
+ // Check if the image is requesting xnack on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+ case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+ // The image is 'xnack-' so the environment must be 'xnack-'.
+ if (!EnvTargetID.contains("xnack-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+ // The image is 'xnack+' so the environment must be 'xnack+'.
+ if (!EnvTargetID.contains("xnack+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+ default:
+ break;
+ }
+
+ // Check if the image is requesting sramecc on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+ case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+ // The image is 'sramecc-' so the environment must be 'sramecc-'.
+ if (!EnvTargetID.contains("sramecc-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+ // The image is 'sramecc+' so the environment must be 'sramecc+'.
+ if (!EnvTargetID.contains("sramecc+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+ break;
+ }
+
+ return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+ KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+ : KernelInfoMap(KIM) {}
+
+ /// Process ELF note to read AMDGPU metadata from respective information
+ /// fields.
+ Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+ if (Note.getName() != "AMDGPU")
+ return Error::success(); // We are not interested in other things
+
+ assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+ "Parse AMDGPU MetaData");
+ auto Desc = Note.getDesc(Align);
+ StringRef MsgPackString =
+ StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+ msgpack::Document MsgPackDoc;
+ if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+ return Error::success();
+
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(MsgPackDoc.getRoot()))
+ return Error::success();
+
+ auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+ if (auto Err = iterateAMDKernels(RootMap))
+ return Err;
+
+ return Error::success();
+ }
+
+private:
+ /// Extracts the relevant information via simple string look-up in the msgpack
+ /// document elements.
+ Error
+ extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+ std::string &KernelName,
+ offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+ if (!V.first.isString())
+ return Error::success();
+
+ const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+ return DK.getString() == SK;
+ };
+
+ const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+ uint32_t *Vals) {
+ assert(DN.isArray() && "MsgPack DocNode is an array node");
+ auto DNA = DN.getArray();
+ assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+ int I = 0;
+ for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+ ++DNABegin) {
+ Vals[I++] = DNABegin->getUInt();
+ }
+ };
+
+ if (IsKey(V.first, ".name")) {
+ KernelName = V.second.toString();
+ } else if (IsKey(V.first, ".sgpr_count")) {
+ KernelData.SGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".sgpr_spill_count")) {
+ KernelData.SGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_count")) {
+ KernelData.VGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_spill_count")) {
+ KernelData.VGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".agpr_count")) {
+ KernelData.AGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+ KernelData.PrivateSegmentSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+ KernelData.GroupSegmentList = V.second.getUInt();
+ } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+ GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+ } else if (IsKey(V.first, ".workgroup_size_hint")) {
+ GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+ } else if (IsKey(V.first, ".wavefront_size")) {
+ KernelData.WavefrontSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+ KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+ }
+
+ return Error::success();
+ }
+
+ /// Get the "amdhsa.kernels" element from the msgpack Document
+ Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+ auto Res = MDN.find("amdhsa.kernels");
+ if (Res == MDN.end())
+ return createStringError(inconvertibleErrorCode(),
+ "Could not find amdhsa.kernels key");
+
+ auto Pair = *Res;
+ assert(Pair.second.isArray() &&
+ "AMDGPU kernel entries are arrays of entries");
+
+ return Pair.second.getArray();
+ }
+
+ /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+ /// MapDocNode that either maps a string to a single value (most of them) or
+ /// to another array of things. Currently, we only handle the case that maps
+ /// to scalar value.
+ Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+ offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+ std::string KernelName;
+ auto Entry = (*It).getMap();
+ for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+ if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+ return Err;
+
+ KernelInfoMap.insert({KernelName, KernelData});
+ return Error::success();
+ }
+
+ /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+ Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+ auto KernelsOrErr = getAMDKernelsArray(MDN);
+ if (auto Err = KernelsOrErr.takeError())
+ return Err;
+
+ auto KernelsArr = *KernelsOrErr;
+ for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+ if (!It->isMap())
+ continue; // we expect <key,value> pairs
+
+ // Obtain the value for the different entries. Each array entry is a
+ // MapDocNode
+ if (auto Err = generateKernelInfo(It))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ // Kernel names are the keys
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ MemoryBufferRef MemBuffer,
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion) {
+ Error Err = Error::success(); // Used later as out-parameter
+
+ auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+ if (auto Err = ELFOrError.takeError())
+ return Err;
+
+ const object::ELF64LEFile ELFObj = ELFOrError.get();
+ Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+ if (!Sections)
+ return Sections.takeError();
+ KernelInfoReader Reader(KernelInfoMap);
+
+ // Read the code object version from ELF image header
+ auto Header = ELFObj.getHeader();
+ ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+ for (const auto &S : *Sections) {
+ if (S.sh_type != ELF::SHT_NOTE)
+ continue;
+
+ for (const auto N : ELFObj.notes(S, Err)) {
+ if (Err)
+ return Err;
+ // Fills the KernelInfoTabel entries in the reader
+ if ((Err = Reader.processNote(N, S.sh_addralign)))
+ return Err;
+ }
+ }
+ return Error::success();
+}
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index 16e0dcfa0e90d6..ac16aef7d61a79 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -1,3 +1,5 @@
+add_subdirectory(AMDGPU)
+
add_llvm_component_library(LLVMFrontendOffloading
Utility.cpp
OffloadWrapper.cpp
diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index 47cd2feefc7288..fef140e529117e 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
message(STATUS "Building AMDGPU plugin linked against libhsa")
- target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloadingAMDGPU)
else()
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloadingAMDGPU)
endif()
# Configure testing for the AMDGPU plugin. We will build tests if we could a
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 604683370cd27d..a434a0089d5f94 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
/// Get additional info for kernel, e.g., register spill counts
- std::optional<utils::KernelMetaDataTy>
+ std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
getKernelInfo(StringRef Identifier) const {
auto It = KernelInfoMap.find(Identifier);
@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
/// The exectuable loaded on the agent.
hsa_executable_t Executable;
hsa_code_object_t CodeObject;
- StringMap<utils::KernelMetaDataTy> KernelInfoMap;
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
uint16_t ELFABIVersion;
};
@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
uint32_t ImplicitArgsSize;
/// Additional Info for the AMD GPU Kernel
- std::optional<utils::KernelMetaDataTy> KernelInfo;
+ std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
};
/// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
- return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
- ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError);
+ return offloading::amdgpu::isImageCompatibleWithEnv(
+ Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+ *TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 58a3b5df00fac6..702dae1751a2d2 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,24 +17,13 @@
#include "omptarget.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::ELF;
+#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
namespace llvm {
namespace omp {
namespace target {
namespace plugin {
namespace utils {
-
// The implicit arguments of COV5 AMDGPU kernels.
struct AMDGPUImplicitArgsTy {
uint32_t BlockCountX;
@@ -61,253 +50,22 @@ inline uint32_t getImplicitArgsSize(uint16_t Version) {
: sizeof(AMDGPUImplicitArgsTy);
}
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
- StringRef EnvTargetID) {
- StringRef EnvArch = EnvTargetID.split(":").first;
-
- // Trivial check if the base processors match.
- if (EnvArch != ImageArch)
- return false;
-
- // Check if the image is requesting xnack on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
- case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
- // The image is 'xnack-' so the environment must be 'xnack-'.
- if (!EnvTargetID.contains("xnack-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_ON_V4:
- // The image is 'xnack+' so the environment must be 'xnack+'.
- if (!EnvTargetID.contains("xnack+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
- default:
- break;
- }
-
- // Check if the image is requesting sramecc on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
- case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
- // The image is 'sramecc-' so the environment must be 'sramecc-'.
- if (!EnvTargetID.contains("sramecc-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
- // The image is 'sramecc+' so the environment must be 'sramecc+'.
- if (!EnvTargetID.contains("sramecc+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
- break;
- }
-
- return true;
-}
-
-struct KernelMetaDataTy {
- uint64_t KernelObject;
- uint32_t GroupSegmentList;
- uint32_t PrivateSegmentSize;
- uint32_t SGPRCount;
- uint32_t VGPRCount;
- uint32_t SGPRSpillCount;
- uint32_t VGPRSpillCount;
- uint32_t KernelSegmentSize;
- uint32_t ExplicitArgumentCount;
- uint32_t ImplicitArgumentCount;
- uint32_t RequestedWorkgroupSize[3];
- uint32_t WorkgroupSizeHint[3];
- uint32_t WavefronSize;
- uint32_t MaxFlatWorkgroupSize;
-};
-namespace {
-
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
- KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
-
- /// Process ELF note to read AMDGPU metadata from respective information
- /// fields.
- Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
- if (Note.getName() != "AMDGPU")
- return Error::success(); // We are not interested in other things
-
- assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
- "Parse AMDGPU MetaData");
- auto Desc = Note.getDesc(Align);
- StringRef MsgPackString =
- StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
- msgpack::Document MsgPackDoc;
- if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
- return Error::success();
-
- AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
- if (!Verifier.verify(MsgPackDoc.getRoot()))
- return Error::success();
-
- auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
- if (auto Err = iterateAMDKernels(RootMap))
- return Err;
-
- return Error::success();
- }
-
-private:
- /// Extracts the relevant information via simple string look-up in the msgpack
- /// document elements.
- Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
- std::string &KernelName,
- KernelMetaDataTy &KernelData) {
- if (!V.first.isString())
- return Error::success();
-
- const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
- return DK.getString() == SK;
- };
-
- const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
- uint32_t *Vals) {
- assert(DN.isArray() && "MsgPack DocNode is an array node");
- auto DNA = DN.getArray();
- assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
- int I = 0;
- for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
- ++DNABegin) {
- Vals[I++] = DNABegin->getUInt();
- }
- };
-
- if (IsKey(V.first, ".name")) {
- KernelName = V.second.toString();
- } else if (IsKey(V.first, ".sgpr_count")) {
- KernelData.SGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".sgpr_spill_count")) {
- KernelData.SGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_count")) {
- KernelData.VGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_spill_count")) {
- KernelData.VGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".private_segment_fixed_size")) {
- KernelData.PrivateSegmentSize = V.second.getUInt();
- } else if (IsKey(V.first, ".group_segment_fixed_size")) {
- KernelData.GroupSegmentList = V.second.getUInt();
- } else if (IsKey(V.first, ".reqd_workgroup_size")) {
- GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
- } else if (IsKey(V.first, ".workgroup_size_hint")) {
- GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
- } else if (IsKey(V.first, ".wavefront_size")) {
- KernelData.WavefronSize = V.second.getUInt();
- } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
- KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
- }
-
- return Error::success();
- }
-
- /// Get the "amdhsa.kernels" element from the msgpack Document
- Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
- auto Res = MDN.find("amdhsa.kernels");
- if (Res == MDN.end())
- return createStringError(inconvertibleErrorCode(),
- "Could not find amdhsa.kernels key");
-
- auto Pair = *Res;
- assert(Pair.second.isArray() &&
- "AMDGPU kernel entries are arrays of entries");
-
- return Pair.second.getArray();
- }
-
- /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
- /// MapDocNode that either maps a string to a single value (most of them) or
- /// to another array of things. Currently, we only handle the case that maps
- /// to scalar value.
- Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
- KernelMetaDataTy KernelData;
- std::string KernelName;
- auto Entry = (*It).getMap();
- for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
- if (auto Err = extractKernelData(*MI, KernelName, KernelData))
- return Err;
-
- KernelInfoMap.insert({KernelName, KernelData});
- return Error::success();
- }
-
- /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
- Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
- auto KernelsOrErr = getAMDKernelsArray(MDN);
- if (auto Err = KernelsOrErr.takeError())
- return Err;
-
- auto KernelsArr = *KernelsOrErr;
- for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
- if (!It->isMap())
- continue; // we expect <key,value> pairs
-
- // Obtain the value for the different entries. Each array entry is a
- // MapDocNode
- if (auto Err = generateKernelInfo(It))
- return Err;
- }
- return Error::success();
- }
-
- // Kernel names are the keys
- StringMap<KernelMetaDataTy> &KernelInfoMap;
-};
-} // namespace
+/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
+/// `Version` is the ELF ABI version, e.g. COV5.
+uint32_t getImplicitArgsSize(uint16_t Version);
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
/// KernelInfoMap
-inline Error
-readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
- StringMap<KernelMetaDataTy> &KernelInfoMap,
- uint16_t &ELFABIVersion) {
- Error Err = Error::success(); // Used later as out-parameter
-
- auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
- if (auto Err = ELFOrError.takeError())
+inline Error readAMDGPUMetaDataFromImage(
+ MemoryBufferRef MemBuffer,
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion) {
+ Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ MemBuffer, KernelInfoMap, ELFABIVersion);
+ if (!Err)
return Err;
-
- const object::ELF64LEFile ELFObj = ELFOrError.get();
- ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
- KernelInfoReader Reader(KernelInfoMap);
-
- // Read the code object version from ELF image header
- auto Header = ELFObj.getHeader();
- ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
-
- for (const auto &S : Sections) {
- if (S.sh_type != ELF::SHT_NOTE)
- continue;
-
- for (const auto N : ELFObj.notes(S, Err)) {
- if (Err)
- return Err;
- // Fills the KernelInfoTabel entries in the reader
- if ((Err = Reader.processNote(N, S.sh_addralign)))
- return Err;
- }
- }
-
- return Error::success();
+ return Err;
}
} // namespace utils
>From 9fa49fac25ad0f84dff9e5fcec0531413c6516ba Mon Sep 17 00:00:00 2001
From: Fabian Mora <fmora.dev at gmail.com>
Date: Mon, 19 Aug 2024 16:09:07 +0000
Subject: [PATCH 2/2] address reviewer comments
---
.../Offloading/AMDGPU/ObjectUtilities.h | 77 ------
.../llvm/Frontend/Offloading/Utility.h | 60 +++++
.../Frontend/Offloading/AMDGPU/CMakeLists.txt | 8 -
.../Offloading/AMDGPU/ObjectUtilities.cpp | 249 ------------------
llvm/lib/Frontend/Offloading/CMakeLists.txt | 3 +-
llvm/lib/Frontend/Offloading/Utility.cpp | 232 ++++++++++++++++
offload/plugins-nextgen/amdgpu/CMakeLists.txt | 4 +-
.../amdgpu/utils/UtilitiesRTL.h | 2 +-
8 files changed, 296 insertions(+), 339 deletions(-)
delete mode 100644 llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
delete mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
delete mode 100644 llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
diff --git a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h b/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
deleted file mode 100644
index a43e4501813024..00000000000000
--- a/llvm/include/llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===---- ObjectUtilities.h - AMDGPU ELF utilities ---------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares AMDGPU ELF related utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include <cstdint>
-
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBufferRef.h"
-
-namespace llvm {
-namespace offloading {
-namespace amdgpu {
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
- StringRef EnvTargetID);
-
-/// Struct for holding metadata related to AMDGPU kernels, for more information
-/// about the metadata and its meaning see:
-/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
-struct AMDGPUKernelMetaData {
- /// Constant indicating that a value is invalid.
- static constexpr uint32_t KInvalidValue =
- std::numeric_limits<uint32_t>::max();
- /// The amount of group segment memory required by a work-group in bytes.
- uint32_t GroupSegmentList = KInvalidValue;
- /// The amount of fixed private address space memory required for a work-item
- /// in bytes.
- uint32_t PrivateSegmentSize = KInvalidValue;
- /// Number of scalar registers required by a wavefront.
- uint32_t SGPRCount = KInvalidValue;
- /// Number of vector registers required by each work-item.
- uint32_t VGPRCount = KInvalidValue;
- /// Number of stores from a scalar register to a register allocator created
- /// spill location.
- uint32_t SGPRSpillCount = KInvalidValue;
- /// Number of stores from a vector register to a register allocator created
- /// spill location.
- uint32_t VGPRSpillCount = KInvalidValue;
- /// Number of accumulator registers required by each work-item.
- uint32_t AGPRCount = KInvalidValue;
- /// Corresponds to the OpenCL reqd_work_group_size attribute.
- uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
- KInvalidValue};
- /// Corresponds to the OpenCL work_group_size_hint attribute.
- uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
- /// Wavefront size.
- uint32_t WavefrontSize = KInvalidValue;
- /// Maximum flat work-group size supported by the kernel in work-items.
- uint32_t MaxFlatWorkgroupSize = KInvalidValue;
-};
-
-/// Reads AMDGPU specific metadata from the ELF file and propagates the
-/// KernelInfoMap.
-Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
- StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
- uint16_t &ELFABIVersion);
-} // namespace amdgpu
-} // namespace offloading
-} // namespace llvm
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index f54dd7ba7ab45f..abaea843848b21 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -9,8 +9,14 @@
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
namespace llvm {
namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName);
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+ StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+ /// Constant indicating that a value is invalid.
+ static constexpr uint32_t KInvalidValue =
+ std::numeric_limits<uint32_t>::max();
+ /// The amount of group segment memory required by a work-group in bytes.
+ uint32_t GroupSegmentList = KInvalidValue;
+ /// The amount of fixed private address space memory required for a work-item
+ /// in bytes.
+ uint32_t PrivateSegmentSize = KInvalidValue;
+ /// Number of scalar registers required by a wavefront.
+ uint32_t SGPRCount = KInvalidValue;
+ /// Number of vector registers required by each work-item.
+ uint32_t VGPRCount = KInvalidValue;
+ /// Number of stores from a scalar register to a register allocator created
+ /// spill location.
+ uint32_t SGPRSpillCount = KInvalidValue;
+ /// Number of stores from a vector register to a register allocator created
+ /// spill location.
+ uint32_t VGPRSpillCount = KInvalidValue;
+ /// Number of accumulator registers required by each work-item.
+ uint32_t AGPRCount = KInvalidValue;
+ /// Corresponds to the OpenCL reqd_work_group_size attribute.
+ uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+ KInvalidValue};
+ /// Corresponds to the OpenCL work_group_size_hint attribute.
+ uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+ /// Wavefront size.
+ uint32_t WavefrontSize = KInvalidValue;
+ /// Maximum flat work-group size supported by the kernel in work-items.
+ uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+ StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion);
+} // namespace amdgpu
} // namespace offloading
} // namespace llvm
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt b/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
deleted file mode 100644
index d1d157b0efa315..00000000000000
--- a/llvm/lib/Frontend/Offloading/AMDGPU/CMakeLists.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-add_llvm_component_library(LLVMFrontendOffloadingAMDGPU
- ObjectUtilities.cpp
-
- LINK_COMPONENTS
- Support
- BinaryFormat
- Object
-)
diff --git a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp b/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
deleted file mode 100644
index 992ee8df630745..00000000000000
--- a/llvm/lib/Frontend/Offloading/AMDGPU/ObjectUtilities.cpp
+++ /dev/null
@@ -1,249 +0,0 @@
-//===---- ObjectUtilities.cpp - AMDGPU ELF utilities -------------- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines AMDGPU ELF related utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Object/ELFObjectFile.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm;
-using namespace llvm::ELF;
-using namespace llvm::offloading::amdgpu;
-
-bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
- uint32_t ImageFlags,
- StringRef EnvTargetID) {
- StringRef EnvArch = EnvTargetID.split(":").first;
-
- // Trivial check if the base processors match.
- if (EnvArch != ImageArch)
- return false;
-
- // Check if the image is requesting xnack on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
- case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
- // The image is 'xnack-' so the environment must be 'xnack-'.
- if (!EnvTargetID.contains("xnack-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_ON_V4:
- // The image is 'xnack+' so the environment must be 'xnack+'.
- if (!EnvTargetID.contains("xnack+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
- default:
- break;
- }
-
- // Check if the image is requesting sramecc on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
- case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
- // The image is 'sramecc-' so the environment must be 'sramecc-'.
- if (!EnvTargetID.contains("sramecc-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
- // The image is 'sramecc+' so the environment must be 'sramecc+'.
- if (!EnvTargetID.contains("sramecc+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
- break;
- }
-
- return true;
-}
-
-namespace {
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
- KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
- : KernelInfoMap(KIM) {}
-
- /// Process ELF note to read AMDGPU metadata from respective information
- /// fields.
- Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
- if (Note.getName() != "AMDGPU")
- return Error::success(); // We are not interested in other things
-
- assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
- "Parse AMDGPU MetaData");
- auto Desc = Note.getDesc(Align);
- StringRef MsgPackString =
- StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
- msgpack::Document MsgPackDoc;
- if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
- return Error::success();
-
- AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
- if (!Verifier.verify(MsgPackDoc.getRoot()))
- return Error::success();
-
- auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
- if (auto Err = iterateAMDKernels(RootMap))
- return Err;
-
- return Error::success();
- }
-
-private:
- /// Extracts the relevant information via simple string look-up in the msgpack
- /// document elements.
- Error
- extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
- std::string &KernelName,
- offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
- if (!V.first.isString())
- return Error::success();
-
- const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
- return DK.getString() == SK;
- };
-
- const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
- uint32_t *Vals) {
- assert(DN.isArray() && "MsgPack DocNode is an array node");
- auto DNA = DN.getArray();
- assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
- int I = 0;
- for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
- ++DNABegin) {
- Vals[I++] = DNABegin->getUInt();
- }
- };
-
- if (IsKey(V.first, ".name")) {
- KernelName = V.second.toString();
- } else if (IsKey(V.first, ".sgpr_count")) {
- KernelData.SGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".sgpr_spill_count")) {
- KernelData.SGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_count")) {
- KernelData.VGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_spill_count")) {
- KernelData.VGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".agpr_count")) {
- KernelData.AGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".private_segment_fixed_size")) {
- KernelData.PrivateSegmentSize = V.second.getUInt();
- } else if (IsKey(V.first, ".group_segment_fixed_size")) {
- KernelData.GroupSegmentList = V.second.getUInt();
- } else if (IsKey(V.first, ".reqd_workgroup_size")) {
- GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
- } else if (IsKey(V.first, ".workgroup_size_hint")) {
- GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
- } else if (IsKey(V.first, ".wavefront_size")) {
- KernelData.WavefrontSize = V.second.getUInt();
- } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
- KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
- }
-
- return Error::success();
- }
-
- /// Get the "amdhsa.kernels" element from the msgpack Document
- Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
- auto Res = MDN.find("amdhsa.kernels");
- if (Res == MDN.end())
- return createStringError(inconvertibleErrorCode(),
- "Could not find amdhsa.kernels key");
-
- auto Pair = *Res;
- assert(Pair.second.isArray() &&
- "AMDGPU kernel entries are arrays of entries");
-
- return Pair.second.getArray();
- }
-
- /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
- /// MapDocNode that either maps a string to a single value (most of them) or
- /// to another array of things. Currently, we only handle the case that maps
- /// to scalar value.
- Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
- offloading::amdgpu::AMDGPUKernelMetaData KernelData;
- std::string KernelName;
- auto Entry = (*It).getMap();
- for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
- if (auto Err = extractKernelData(*MI, KernelName, KernelData))
- return Err;
-
- KernelInfoMap.insert({KernelName, KernelData});
- return Error::success();
- }
-
- /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
- Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
- auto KernelsOrErr = getAMDKernelsArray(MDN);
- if (auto Err = KernelsOrErr.takeError())
- return Err;
-
- auto KernelsArr = *KernelsOrErr;
- for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
- if (!It->isMap())
- continue; // we expect <key,value> pairs
-
- // Obtain the value for the different entries. Each array entry is a
- // MapDocNode
- if (auto Err = generateKernelInfo(It))
- return Err;
- }
- return Error::success();
- }
-
- // Kernel names are the keys
- StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
-};
-} // namespace
-
-Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
- MemoryBufferRef MemBuffer,
- StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
- uint16_t &ELFABIVersion) {
- Error Err = Error::success(); // Used later as out-parameter
-
- auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
- if (auto Err = ELFOrError.takeError())
- return Err;
-
- const object::ELF64LEFile ELFObj = ELFOrError.get();
- Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
- if (!Sections)
- return Sections.takeError();
- KernelInfoReader Reader(KernelInfoMap);
-
- // Read the code object version from ELF image header
- auto Header = ELFObj.getHeader();
- ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
- for (const auto &S : *Sections) {
- if (S.sh_type != ELF::SHT_NOTE)
- continue;
-
- for (const auto N : ELFObj.notes(S, Err)) {
- if (Err)
- return Err;
- // Fills the KernelInfoTabel entries in the reader
- if ((Err = Reader.processNote(N, S.sh_addralign)))
- return Err;
- }
- }
- return Error::success();
-}
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index ac16aef7d61a79..ce445ad9cc4cb6 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -1,5 +1,3 @@
-add_subdirectory(AMDGPU)
-
add_llvm_component_library(LLVMFrontendOffloading
Utility.cpp
OffloadWrapper.cpp
@@ -13,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
LINK_COMPONENTS
Core
BinaryFormat
+ Object
Support
TransformUtils
TargetParser
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 919b9462e32d48..010c0bfd3be76b 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -7,10 +7,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
return std::make_pair(EntriesB, EntriesE);
}
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+ uint32_t ImageFlags,
+ StringRef EnvTargetID) {
+ using namespace llvm::ELF;
+ StringRef EnvArch = EnvTargetID.split(":").first;
+
+ // Trivial check if the base processors match.
+ if (EnvArch != ImageArch)
+ return false;
+
+ // Check if the image is requesting xnack on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+ case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+ // The image is 'xnack-' so the environment must be 'xnack-'.
+ if (!EnvTargetID.contains("xnack-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+ // The image is 'xnack+' so the environment must be 'xnack+'.
+ if (!EnvTargetID.contains("xnack+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+ default:
+ break;
+ }
+
+ // Check if the image is requesting sramecc on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+ case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+ // The image is 'sramecc-' so the environment must be 'sramecc-'.
+ if (!EnvTargetID.contains("sramecc-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+ // The image is 'sramecc+' so the environment must be 'sramecc+'.
+ if (!EnvTargetID.contains("sramecc+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+ break;
+ }
+
+ return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+ KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+ : KernelInfoMap(KIM) {}
+
+ /// Process ELF note to read AMDGPU metadata from respective information
+ /// fields.
+ Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+ if (Note.getName() != "AMDGPU")
+ return Error::success(); // We are not interested in other things
+
+ assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+ "Parse AMDGPU MetaData");
+ auto Desc = Note.getDesc(Align);
+ StringRef MsgPackString =
+ StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+ msgpack::Document MsgPackDoc;
+ if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+ return Error::success();
+
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(MsgPackDoc.getRoot()))
+ return Error::success();
+
+ auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+ if (auto Err = iterateAMDKernels(RootMap))
+ return Err;
+
+ return Error::success();
+ }
+
+private:
+ /// Extracts the relevant information via simple string look-up in the msgpack
+ /// document elements.
+ Error
+ extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+ std::string &KernelName,
+ offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+ if (!V.first.isString())
+ return Error::success();
+
+ const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+ return DK.getString() == SK;
+ };
+
+ const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+ uint32_t *Vals) {
+ assert(DN.isArray() && "MsgPack DocNode is an array node");
+ auto DNA = DN.getArray();
+ assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+ int I = 0;
+ for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+ ++DNABegin) {
+ Vals[I++] = DNABegin->getUInt();
+ }
+ };
+
+ if (IsKey(V.first, ".name")) {
+ KernelName = V.second.toString();
+ } else if (IsKey(V.first, ".sgpr_count")) {
+ KernelData.SGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".sgpr_spill_count")) {
+ KernelData.SGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_count")) {
+ KernelData.VGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_spill_count")) {
+ KernelData.VGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".agpr_count")) {
+ KernelData.AGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+ KernelData.PrivateSegmentSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+ KernelData.GroupSegmentList = V.second.getUInt();
+ } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+ GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+ } else if (IsKey(V.first, ".workgroup_size_hint")) {
+ GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+ } else if (IsKey(V.first, ".wavefront_size")) {
+ KernelData.WavefrontSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+ KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+ }
+
+ return Error::success();
+ }
+
+ /// Get the "amdhsa.kernels" element from the msgpack Document
+ Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+ auto Res = MDN.find("amdhsa.kernels");
+ if (Res == MDN.end())
+ return createStringError(inconvertibleErrorCode(),
+ "Could not find amdhsa.kernels key");
+
+ auto Pair = *Res;
+ assert(Pair.second.isArray() &&
+ "AMDGPU kernel entries are arrays of entries");
+
+ return Pair.second.getArray();
+ }
+
+ /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+ /// MapDocNode that either maps a string to a single value (most of them) or
+ /// to another array of things. Currently, we only handle the case that maps
+ /// to scalar value.
+ Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+ offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+ std::string KernelName;
+ auto Entry = (*It).getMap();
+ for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+ if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+ return Err;
+
+ KernelInfoMap.insert({KernelName, KernelData});
+ return Error::success();
+ }
+
+ /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+ Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+ auto KernelsOrErr = getAMDKernelsArray(MDN);
+ if (auto Err = KernelsOrErr.takeError())
+ return Err;
+
+ auto KernelsArr = *KernelsOrErr;
+ for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+ if (!It->isMap())
+ continue; // we expect <key,value> pairs
+
+ // Obtain the value for the different entries. Each array entry is a
+ // MapDocNode
+ if (auto Err = generateKernelInfo(It))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ // Kernel names are the keys
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ MemoryBufferRef MemBuffer,
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion) {
+ Error Err = Error::success(); // Used later as out-parameter
+
+ auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+ if (auto Err = ELFOrError.takeError())
+ return Err;
+
+ const object::ELF64LEFile ELFObj = ELFOrError.get();
+ Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+ if (!Sections)
+ return Sections.takeError();
+ KernelInfoReader Reader(KernelInfoMap);
+
+ // Read the code object version from ELF image header
+ auto Header = ELFObj.getHeader();
+ ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+ for (const auto &S : *Sections) {
+ if (S.sh_type != ELF::SHT_NOTE)
+ continue;
+
+ for (const auto N : ELFObj.notes(S, Err)) {
+ if (Err)
+ return Err;
+ // Fills the KernelInfoTabel entries in the reader
+ if ((Err = Reader.processNote(N, S.sh_addralign)))
+ return Err;
+ }
+ }
+ return Error::success();
+}
diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index fef140e529117e..b40c62d43226f4 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,12 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
message(STATUS "Building AMDGPU plugin linked against libhsa")
- target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloadingAMDGPU)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
else()
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
- target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloadingAMDGPU)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
endif()
# Configure testing for the AMDGPU plugin. We will build tests if we could a
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 702dae1751a2d2..fb87e96d239bcc 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,7 +17,7 @@
#include "omptarget.h"
-#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
+#include "llvm/Frontend/Offloading/Utility.h"
namespace llvm {
namespace omp {
More information about the llvm-commits
mailing list