[llvm] cfc76b6 - [llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 06:03:12 PDT 2024
Author: Fabian Mora
Date: 2024-08-20T09:03:06-04:00
New Revision: cfc76b6498a3c03268b3f319249afa4512d68e1c
URL: https://github.com/llvm/llvm-project/commit/cfc76b6498a3c03268b3f319249afa4512d68e1c
DIFF: https://github.com/llvm/llvm-project/commit/cfc76b6498a3c03268b3f319249afa4512d68e1c.diff
LOG: [llvm][offload] Move AMDGPU offload utilities to LLVM (#102487)
This patch moves utilities from
`offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to
`llvm/Frontend/Offloading/Utility.h` to be reused by
other projects.
Concretely the following changes were made:
- Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`.
- Remove unused fields `KernelObject`, `KernelSegmentSize`,
`ExplicitArgumentCount` and `ImplicitArgumentCount` from
`AMDGPUKernelMetaData`.
- Return the produced error if `ELFObj.sections()` failed instead of
using `cantFail`.
- Added `AGPRCount` field to `AMDGPUKernelMetaData`.
- Added a default invalid value to all the fields in
`AMDGPUKernelMetaData`.
Added:
Modified:
llvm/include/llvm/Frontend/Offloading/Utility.h
llvm/lib/Frontend/Offloading/CMakeLists.txt
llvm/lib/Frontend/Offloading/Utility.cpp
offload/plugins-nextgen/amdgpu/CMakeLists.txt
offload/plugins-nextgen/amdgpu/src/rtl.cpp
offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/Frontend/Offloading/Utility.h b/llvm/include/llvm/Frontend/Offloading/Utility.h
index f54dd7ba7ab45f..abaea843848b21 100644
--- a/llvm/include/llvm/Frontend/Offloading/Utility.h
+++ b/llvm/include/llvm/Frontend/Offloading/Utility.h
@@ -9,8 +9,14 @@
#ifndef LLVM_FRONTEND_OFFLOADING_UTILITY_H
#define LLVM_FRONTEND_OFFLOADING_UTILITY_H
+#include <cstdint>
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Module.h"
#include "llvm/Object/OffloadBinary.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBufferRef.h"
namespace llvm {
namespace offloading {
@@ -73,6 +79,60 @@ getOffloadingEntryInitializer(Module &M, Constant *Addr, StringRef Name,
std::pair<GlobalVariable *, GlobalVariable *>
getOffloadEntryArray(Module &M, StringRef SectionName);
+namespace amdgpu {
+/// Check if an image is compatible with current system's environment. The
+/// system environment is given as a 'target-id' which has the form:
+///
+/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
+///
+/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
+/// and is compatible with either '+' or '-'. The HSA runtime returns this
+/// information using the target-id, while we use the ELF header to determine
+/// these features.
+bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
+ StringRef EnvTargetID);
+
+/// Struct for holding metadata related to AMDGPU kernels, for more information
+/// about the metadata and its meaning see:
+/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
+struct AMDGPUKernelMetaData {
+ /// Constant indicating that a value is invalid.
+ static constexpr uint32_t KInvalidValue =
+ std::numeric_limits<uint32_t>::max();
+ /// The amount of group segment memory required by a work-group in bytes.
+ uint32_t GroupSegmentList = KInvalidValue;
+ /// The amount of fixed private address space memory required for a work-item
+ /// in bytes.
+ uint32_t PrivateSegmentSize = KInvalidValue;
+ /// Number of scalar registers required by a wavefront.
+ uint32_t SGPRCount = KInvalidValue;
+ /// Number of vector registers required by each work-item.
+ uint32_t VGPRCount = KInvalidValue;
+ /// Number of stores from a scalar register to a register allocator created
+ /// spill location.
+ uint32_t SGPRSpillCount = KInvalidValue;
+ /// Number of stores from a vector register to a register allocator created
+ /// spill location.
+ uint32_t VGPRSpillCount = KInvalidValue;
+ /// Number of accumulator registers required by each work-item.
+ uint32_t AGPRCount = KInvalidValue;
+ /// Corresponds to the OpenCL reqd_work_group_size attribute.
+ uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
+ KInvalidValue};
+ /// Corresponds to the OpenCL work_group_size_hint attribute.
+ uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
+ /// Wavefront size.
+ uint32_t WavefrontSize = KInvalidValue;
+ /// Maximum flat work-group size supported by the kernel in work-items.
+ uint32_t MaxFlatWorkgroupSize = KInvalidValue;
+};
+
+/// Reads AMDGPU specific metadata from the ELF file and propagates the
+/// KernelInfoMap.
+Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
+ StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion);
+} // namespace amdgpu
} // namespace offloading
} // namespace llvm
diff --git a/llvm/lib/Frontend/Offloading/CMakeLists.txt b/llvm/lib/Frontend/Offloading/CMakeLists.txt
index 16e0dcfa0e90d6..ce445ad9cc4cb6 100644
--- a/llvm/lib/Frontend/Offloading/CMakeLists.txt
+++ b/llvm/lib/Frontend/Offloading/CMakeLists.txt
@@ -11,6 +11,7 @@ add_llvm_component_library(LLVMFrontendOffloading
LINK_COMPONENTS
Core
BinaryFormat
+ Object
Support
TransformUtils
TargetParser
diff --git a/llvm/lib/Frontend/Offloading/Utility.cpp b/llvm/lib/Frontend/Offloading/Utility.cpp
index 919b9462e32d48..010c0bfd3be76b 100644
--- a/llvm/lib/Frontend/Offloading/Utility.cpp
+++ b/llvm/lib/Frontend/Offloading/Utility.cpp
@@ -7,10 +7,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Frontend/Offloading/Utility.h"
+#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/Value.h"
+#include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MemoryBufferRef.h"
+#include "llvm/Support/YAMLTraits.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
using namespace llvm;
@@ -126,3 +132,229 @@ offloading::getOffloadEntryArray(Module &M, StringRef SectionName) {
return std::make_pair(EntriesB, EntriesE);
}
+
+bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
+ uint32_t ImageFlags,
+ StringRef EnvTargetID) {
+ using namespace llvm::ELF;
+ StringRef EnvArch = EnvTargetID.split(":").first;
+
+ // Trivial check if the base processors match.
+ if (EnvArch != ImageArch)
+ return false;
+
+ // Check if the image is requesting xnack on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
+ case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
+ // The image is 'xnack-' so the environment must be 'xnack-'.
+ if (!EnvTargetID.contains("xnack-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_ON_V4:
+ // The image is 'xnack+' so the environment must be 'xnack+'.
+ if (!EnvTargetID.contains("xnack+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
+ default:
+ break;
+ }
+
+ // Check if the image is requesting sramecc on or off.
+ switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
+ case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
+ // The image is 'sramecc-' so the environment must be 'sramecc-'.
+ if (!EnvTargetID.contains("sramecc-"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
+ // The image is 'sramecc+' so the environment must be 'sramecc+'.
+ if (!EnvTargetID.contains("sramecc+"))
+ return false;
+ break;
+ case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
+ case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
+ break;
+ }
+
+ return true;
+}
+
+namespace {
+/// Reads the AMDGPU specific per-kernel-metadata from an image.
+class KernelInfoReader {
+public:
+ KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
+ : KernelInfoMap(KIM) {}
+
+ /// Process ELF note to read AMDGPU metadata from respective information
+ /// fields.
+ Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
+ if (Note.getName() != "AMDGPU")
+ return Error::success(); // We are not interested in other things
+
+ assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
+ "Parse AMDGPU MetaData");
+ auto Desc = Note.getDesc(Align);
+ StringRef MsgPackString =
+ StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
+ msgpack::Document MsgPackDoc;
+ if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
+ return Error::success();
+
+ AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
+ if (!Verifier.verify(MsgPackDoc.getRoot()))
+ return Error::success();
+
+ auto RootMap = MsgPackDoc.getRoot().getMap(true);
+
+ if (auto Err = iterateAMDKernels(RootMap))
+ return Err;
+
+ return Error::success();
+ }
+
+private:
+ /// Extracts the relevant information via simple string look-up in the msgpack
+ /// document elements.
+ Error
+ extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
+ std::string &KernelName,
+ offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
+ if (!V.first.isString())
+ return Error::success();
+
+ const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
+ return DK.getString() == SK;
+ };
+
+ const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
+ uint32_t *Vals) {
+ assert(DN.isArray() && "MsgPack DocNode is an array node");
+ auto DNA = DN.getArray();
+ assert(DNA.size() == 3 && "ArrayNode has at most three elements");
+
+ int I = 0;
+ for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
+ ++DNABegin) {
+ Vals[I++] = DNABegin->getUInt();
+ }
+ };
+
+ if (IsKey(V.first, ".name")) {
+ KernelName = V.second.toString();
+ } else if (IsKey(V.first, ".sgpr_count")) {
+ KernelData.SGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".sgpr_spill_count")) {
+ KernelData.SGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_count")) {
+ KernelData.VGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".vgpr_spill_count")) {
+ KernelData.VGPRSpillCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".agpr_count")) {
+ KernelData.AGPRCount = V.second.getUInt();
+ } else if (IsKey(V.first, ".private_segment_fixed_size")) {
+ KernelData.PrivateSegmentSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".group_segment_fixed_size")) {
+ KernelData.GroupSegmentList = V.second.getUInt();
+ } else if (IsKey(V.first, ".reqd_workgroup_size")) {
+ GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
+ } else if (IsKey(V.first, ".workgroup_size_hint")) {
+ GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
+ } else if (IsKey(V.first, ".wavefront_size")) {
+ KernelData.WavefrontSize = V.second.getUInt();
+ } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
+ KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
+ }
+
+ return Error::success();
+ }
+
+ /// Get the "amdhsa.kernels" element from the msgpack Document
+ Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
+ auto Res = MDN.find("amdhsa.kernels");
+ if (Res == MDN.end())
+ return createStringError(inconvertibleErrorCode(),
+ "Could not find amdhsa.kernels key");
+
+ auto Pair = *Res;
+ assert(Pair.second.isArray() &&
+ "AMDGPU kernel entries are arrays of entries");
+
+ return Pair.second.getArray();
+ }
+
+ /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
+ /// MapDocNode that either maps a string to a single value (most of them) or
+ /// to another array of things. Currently, we only handle the case that maps
+ /// to scalar value.
+ Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
+ offloading::amdgpu::AMDGPUKernelMetaData KernelData;
+ std::string KernelName;
+ auto Entry = (*It).getMap();
+ for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
+ if (auto Err = extractKernelData(*MI, KernelName, KernelData))
+ return Err;
+
+ KernelInfoMap.insert({KernelName, KernelData});
+ return Error::success();
+ }
+
+ /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
+ Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
+ auto KernelsOrErr = getAMDKernelsArray(MDN);
+ if (auto Err = KernelsOrErr.takeError())
+ return Err;
+
+ auto KernelsArr = *KernelsOrErr;
+ for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
+ if (!It->isMap())
+ continue; // we expect <key,value> pairs
+
+ // Obtain the value for the
diff erent entries. Each array entry is a
+ // MapDocNode
+ if (auto Err = generateKernelInfo(It))
+ return Err;
+ }
+ return Error::success();
+ }
+
+ // Kernel names are the keys
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
+};
+} // namespace
+
+Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ MemoryBufferRef MemBuffer,
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion) {
+ Error Err = Error::success(); // Used later as out-parameter
+
+ auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
+ if (auto Err = ELFOrError.takeError())
+ return Err;
+
+ const object::ELF64LEFile ELFObj = ELFOrError.get();
+ Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
+ if (!Sections)
+ return Sections.takeError();
+ KernelInfoReader Reader(KernelInfoMap);
+
+ // Read the code object version from ELF image header
+ auto Header = ELFObj.getHeader();
+ ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
+ for (const auto &S : *Sections) {
+ if (S.sh_type != ELF::SHT_NOTE)
+ continue;
+
+ for (const auto N : ELFObj.notes(S, Err)) {
+ if (Err)
+ return Err;
+ // Fills the KernelInfoTabel entries in the reader
+ if ((Err = Reader.processNote(N, S.sh_addralign)))
+ return Err;
+ }
+ }
+ return Error::success();
+}
diff --git a/offload/plugins-nextgen/amdgpu/CMakeLists.txt b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
index 47cd2feefc7288..b40c62d43226f4 100644
--- a/offload/plugins-nextgen/amdgpu/CMakeLists.txt
+++ b/offload/plugins-nextgen/amdgpu/CMakeLists.txt
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
message(STATUS "Building AMDGPU plugin linked against libhsa")
- target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloading)
else()
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
+ target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloading)
endif()
# Configure testing for the AMDGPU plugin. We will build tests if we could a
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 604683370cd27d..a434a0089d5f94 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
/// Get additional info for kernel, e.g., register spill counts
- std::optional<utils::KernelMetaDataTy>
+ std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
getKernelInfo(StringRef Identifier) const {
auto It = KernelInfoMap.find(Identifier);
@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
/// The exectuable loaded on the agent.
hsa_executable_t Executable;
hsa_code_object_t CodeObject;
- StringMap<utils::KernelMetaDataTy> KernelInfoMap;
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
uint16_t ELFABIVersion;
};
@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
uint32_t ImplicitArgsSize;
/// Additional Info for the AMD GPU Kernel
- std::optional<utils::KernelMetaDataTy> KernelInfo;
+ std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
};
/// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
- return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
- ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError);
+ return offloading::amdgpu::isImageCompatibleWithEnv(
+ Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+ *TargeTripleAndFeaturesOrError);
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 58a3b5df00fac6..0b6bc50ebf1d89 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -17,24 +17,13 @@
#include "omptarget.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
-
-#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
-#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/BinaryFormat/MsgPackDocument.h"
-#include "llvm/Support/MemoryBufferRef.h"
-#include "llvm/Support/YAMLTraits.h"
-
-using namespace llvm::ELF;
+#include "llvm/Frontend/Offloading/Utility.h"
namespace llvm {
namespace omp {
namespace target {
namespace plugin {
namespace utils {
-
// The implicit arguments of COV5 AMDGPU kernels.
struct AMDGPUImplicitArgsTy {
uint32_t BlockCountX;
@@ -55,259 +44,26 @@ struct AMDGPUImplicitArgsTyCOV4 {
uint8_t Unused[56];
};
+/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
+/// `Version` is the ELF ABI version, e.g. COV5.
inline uint32_t getImplicitArgsSize(uint16_t Version) {
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
? sizeof(AMDGPUImplicitArgsTyCOV4)
: sizeof(AMDGPUImplicitArgsTy);
}
-/// Check if an image is compatible with current system's environment. The
-/// system environment is given as a 'target-id' which has the form:
-///
-/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
-///
-/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
-/// and is compatible with either '+' or '-'. The HSA runtime returns this
-/// information using the target-id, while we use the ELF header to determine
-/// these features.
-inline bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
- StringRef EnvTargetID) {
- StringRef EnvArch = EnvTargetID.split(":").first;
-
- // Trivial check if the base processors match.
- if (EnvArch != ImageArch)
- return false;
-
- // Check if the image is requesting xnack on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
- case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
- // The image is 'xnack-' so the environment must be 'xnack-'.
- if (!EnvTargetID.contains("xnack-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_ON_V4:
- // The image is 'xnack+' so the environment must be 'xnack+'.
- if (!EnvTargetID.contains("xnack+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
- default:
- break;
- }
-
- // Check if the image is requesting sramecc on or off.
- switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
- case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
- // The image is 'sramecc-' so the environment must be 'sramecc-'.
- if (!EnvTargetID.contains("sramecc-"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
- // The image is 'sramecc+' so the environment must be 'sramecc+'.
- if (!EnvTargetID.contains("sramecc+"))
- return false;
- break;
- case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
- case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
- break;
- }
-
- return true;
-}
-
-struct KernelMetaDataTy {
- uint64_t KernelObject;
- uint32_t GroupSegmentList;
- uint32_t PrivateSegmentSize;
- uint32_t SGPRCount;
- uint32_t VGPRCount;
- uint32_t SGPRSpillCount;
- uint32_t VGPRSpillCount;
- uint32_t KernelSegmentSize;
- uint32_t ExplicitArgumentCount;
- uint32_t ImplicitArgumentCount;
- uint32_t RequestedWorkgroupSize[3];
- uint32_t WorkgroupSizeHint[3];
- uint32_t WavefronSize;
- uint32_t MaxFlatWorkgroupSize;
-};
-namespace {
-
-/// Reads the AMDGPU specific per-kernel-metadata from an image.
-class KernelInfoReader {
-public:
- KernelInfoReader(StringMap<KernelMetaDataTy> &KIM) : KernelInfoMap(KIM) {}
-
- /// Process ELF note to read AMDGPU metadata from respective information
- /// fields.
- Error processNote(const object::ELF64LE::Note &Note, size_t Align) {
- if (Note.getName() != "AMDGPU")
- return Error::success(); // We are not interested in other things
-
- assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
- "Parse AMDGPU MetaData");
- auto Desc = Note.getDesc(Align);
- StringRef MsgPackString =
- StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
- msgpack::Document MsgPackDoc;
- if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
- return Error::success();
-
- AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
- if (!Verifier.verify(MsgPackDoc.getRoot()))
- return Error::success();
-
- auto RootMap = MsgPackDoc.getRoot().getMap(true);
-
- if (auto Err = iterateAMDKernels(RootMap))
- return Err;
-
- return Error::success();
- }
-
-private:
- /// Extracts the relevant information via simple string look-up in the msgpack
- /// document elements.
- Error extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
- std::string &KernelName,
- KernelMetaDataTy &KernelData) {
- if (!V.first.isString())
- return Error::success();
-
- const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
- return DK.getString() == SK;
- };
-
- const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
- uint32_t *Vals) {
- assert(DN.isArray() && "MsgPack DocNode is an array node");
- auto DNA = DN.getArray();
- assert(DNA.size() == 3 && "ArrayNode has at most three elements");
-
- int I = 0;
- for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
- ++DNABegin) {
- Vals[I++] = DNABegin->getUInt();
- }
- };
-
- if (IsKey(V.first, ".name")) {
- KernelName = V.second.toString();
- } else if (IsKey(V.first, ".sgpr_count")) {
- KernelData.SGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".sgpr_spill_count")) {
- KernelData.SGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_count")) {
- KernelData.VGPRCount = V.second.getUInt();
- } else if (IsKey(V.first, ".vgpr_spill_count")) {
- KernelData.VGPRSpillCount = V.second.getUInt();
- } else if (IsKey(V.first, ".private_segment_fixed_size")) {
- KernelData.PrivateSegmentSize = V.second.getUInt();
- } else if (IsKey(V.first, ".group_segment_fixed_size")) {
- KernelData.GroupSegmentList = V.second.getUInt();
- } else if (IsKey(V.first, ".reqd_workgroup_size")) {
- GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
- } else if (IsKey(V.first, ".workgroup_size_hint")) {
- GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
- } else if (IsKey(V.first, ".wavefront_size")) {
- KernelData.WavefronSize = V.second.getUInt();
- } else if (IsKey(V.first, ".max_flat_workgroup_size")) {
- KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
- }
-
- return Error::success();
- }
-
- /// Get the "amdhsa.kernels" element from the msgpack Document
- Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
- auto Res = MDN.find("amdhsa.kernels");
- if (Res == MDN.end())
- return createStringError(inconvertibleErrorCode(),
- "Could not find amdhsa.kernels key");
-
- auto Pair = *Res;
- assert(Pair.second.isArray() &&
- "AMDGPU kernel entries are arrays of entries");
-
- return Pair.second.getArray();
- }
-
- /// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
- /// MapDocNode that either maps a string to a single value (most of them) or
- /// to another array of things. Currently, we only handle the case that maps
- /// to scalar value.
- Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
- KernelMetaDataTy KernelData;
- std::string KernelName;
- auto Entry = (*It).getMap();
- for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
- if (auto Err = extractKernelData(*MI, KernelName, KernelData))
- return Err;
-
- KernelInfoMap.insert({KernelName, KernelData});
- return Error::success();
- }
-
- /// Go over the list of AMD kernels in the "amdhsa.kernels" entry
- Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
- auto KernelsOrErr = getAMDKernelsArray(MDN);
- if (auto Err = KernelsOrErr.takeError())
- return Err;
-
- auto KernelsArr = *KernelsOrErr;
- for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
- if (!It->isMap())
- continue; // we expect <key,value> pairs
-
- // Obtain the value for the
diff erent entries. Each array entry is a
- // MapDocNode
- if (auto Err = generateKernelInfo(It))
- return Err;
- }
- return Error::success();
- }
-
- // Kernel names are the keys
- StringMap<KernelMetaDataTy> &KernelInfoMap;
-};
-} // namespace
-
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
/// KernelInfoMap
-inline Error
-readAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
- StringMap<KernelMetaDataTy> &KernelInfoMap,
- uint16_t &ELFABIVersion) {
- Error Err = Error::success(); // Used later as out-parameter
-
- auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
- if (auto Err = ELFOrError.takeError())
+inline Error readAMDGPUMetaDataFromImage(
+ MemoryBufferRef MemBuffer,
+ StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
+ uint16_t &ELFABIVersion) {
+ Error Err = llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
+ MemBuffer, KernelInfoMap, ELFABIVersion);
+ if (!Err)
return Err;
-
- const object::ELF64LEFile ELFObj = ELFOrError.get();
- ArrayRef<object::ELF64LE::Shdr> Sections = cantFail(ELFObj.sections());
- KernelInfoReader Reader(KernelInfoMap);
-
- // Read the code object version from ELF image header
- auto Header = ELFObj.getHeader();
- ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
DP("ELFABIVERSION Version: %u\n", ELFABIVersion);
-
- for (const auto &S : Sections) {
- if (S.sh_type != ELF::SHT_NOTE)
- continue;
-
- for (const auto N : ELFObj.notes(S, Err)) {
- if (Err)
- return Err;
- // Fills the KernelInfoTabel entries in the reader
- if ((Err = Reader.processNote(N, S.sh_addralign)))
- return Err;
- }
- }
-
- return Error::success();
+ return Err;
}
} // namespace utils
More information about the llvm-commits
mailing list