[llvm] [Offload][AMDGPU] accept generic target (PR #118919)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 20:14:43 PST 2024
https://github.com/hidekisaito created https://github.com/llvm/llvm-project/pull/118919
None
>From 07d439c245b333f516316bff72f98c9b35b6e3f8 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Thu, 5 Dec 2024 21:46:06 -0600
Subject: [PATCH] [Offload][AMDGPU] accept generic target
---
offload/DeviceRTL/CMakeLists.txt | 15 ++++--
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 48 +++++++++++--------
.../plugins-nextgen/common/src/Utils/ELF.cpp | 3 +-
3 files changed, 39 insertions(+), 27 deletions(-)
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 32a7510be980d8..b9659ca3d7a9c2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
-set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
- "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
- "gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
- "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
- "gfx1151;gfx1152;gfx1153")
+set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
+ "gfx9-generic;gfx900;gfx902;gfx906;gfx908"
+ "gfx90a;gfx90c"
+ "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
+ "gfx10-1-generic;gfx1010;gfx1012"
+ "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
+ "gfx1034;gfx1035;gfx1036"
+ "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
+ "gfx1150;gfx1151;gfx1152;gfx1153"
+ "gfx12-generic")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
set(all_gpu_architectures
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index d74e65d4165679..cdc7f5ae0427ad 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}
-Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
- std::string Target;
+Error getTargetTripleAndFeatures(hsa_agent_t Agent,
+ SmallVector<std::string> &Targets) {
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
@@ -205,13 +205,16 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
return Status;
llvm::StringRef TripleTarget(ISAName.begin(), Length);
- if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
- Target = TripleTarget.ltrim('-').rtrim('\0').str();
- return HSA_STATUS_INFO_BREAK;
+ if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
+ auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
+ if (Target.find("generic") != std::string::npos)
+ Targets.push_back(Target);
+ else
+ Targets[0] = Target;
+ }
+ return HSA_STATUS_SUCCESS;
});
- if (Err)
- return Err;
- return Target;
+ return Err;
}
} // namespace hsa_utils
@@ -1988,12 +1991,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
// Detect if XNACK is enabled
- auto TargeTripleAndFeaturesOrError =
- hsa_utils::getTargetTripleAndFeatures(Agent);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
- .contains("xnack+"))
+ SmallVector<std::string> Targets;
+ Targets.push_back("");
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
+ return Err;
+ if (Targets[0].find("xnack+") != std::string::npos)
IsXnackEnabled = true;
// detect if device is an APU.
@@ -3207,13 +3209,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
if (!Processor)
return false;
- auto TargeTripleAndFeaturesOrError =
- hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- return offloading::amdgpu::isImageCompatibleWithEnv(
- Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError);
+ SmallVector<std::string> Targets;
+ Targets.push_back("");
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(
+ getKernelAgent(DeviceId), Targets))
+ return Err;
+ for (auto &Target : Targets)
+ if (offloading::amdgpu::isImageCompatibleWithEnv(
+ Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+ Target))
+ return true;
+ return false;
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index f5037611e72e0e..10b32440dc8778 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 4 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
- (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
+ (Header.e_flags & EF_AMDGPU_MACH) >
+ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
More information about the llvm-commits
mailing list