[llvm] [Offload][AMDGPU] accept generic target (PR #118919)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 12:28:59 PST 2024
https://github.com/hidekisaito updated https://github.com/llvm/llvm-project/pull/118919
>From 07d439c245b333f516316bff72f98c9b35b6e3f8 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Thu, 5 Dec 2024 21:46:06 -0600
Subject: [PATCH 1/4] [Offload][AMDGPU] accept generic target
---
offload/DeviceRTL/CMakeLists.txt | 15 ++++--
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 48 +++++++++++--------
.../plugins-nextgen/common/src/Utils/ELF.cpp | 3 +-
3 files changed, 39 insertions(+), 27 deletions(-)
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 32a7510be980d8..b9659ca3d7a9c2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)
-set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
- "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
- "gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
- "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
- "gfx1151;gfx1152;gfx1153")
+set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
+ "gfx9-generic;gfx900;gfx902;gfx906;gfx908"
+ "gfx90a;gfx90c"
+ "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
+ "gfx10-1-generic;gfx1010;gfx1012"
+ "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
+ "gfx1034;gfx1035;gfx1036"
+ "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
+ "gfx1150;gfx1151;gfx1152;gfx1153"
+ "gfx12-generic")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
set(all_gpu_architectures
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index d74e65d4165679..cdc7f5ae0427ad 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}
-Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
- std::string Target;
+Error getTargetTripleAndFeatures(hsa_agent_t Agent,
+ SmallVector<std::string> &Targets) {
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
@@ -205,13 +205,16 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
return Status;
llvm::StringRef TripleTarget(ISAName.begin(), Length);
- if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
- Target = TripleTarget.ltrim('-').rtrim('\0').str();
- return HSA_STATUS_INFO_BREAK;
+ if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
+ auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
+ if (Target.find("generic") != std::string::npos)
+ Targets.push_back(Target);
+ else
+ Targets[0] = Target;
+ }
+ return HSA_STATUS_SUCCESS;
});
- if (Err)
- return Err;
- return Target;
+ return Err;
}
} // namespace hsa_utils
@@ -1988,12 +1991,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
// Detect if XNACK is enabled
- auto TargeTripleAndFeaturesOrError =
- hsa_utils::getTargetTripleAndFeatures(Agent);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
- .contains("xnack+"))
+ SmallVector<std::string> Targets;
+ Targets.push_back("");
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
+ return Err;
+ if (Targets[0].find("xnack+") != std::string::npos)
IsXnackEnabled = true;
// detect if device is an APU.
@@ -3207,13 +3209,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
if (!Processor)
return false;
- auto TargeTripleAndFeaturesOrError =
- hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- return offloading::amdgpu::isImageCompatibleWithEnv(
- Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
- *TargeTripleAndFeaturesOrError);
+ SmallVector<std::string> Targets;
+ Targets.push_back("");
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(
+ getKernelAgent(DeviceId), Targets))
+ return Err;
+ for (auto &Target : Targets)
+ if (offloading::amdgpu::isImageCompatibleWithEnv(
+ Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+ Target))
+ return true;
+ return false;
}
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index f5037611e72e0e..10b32440dc8778 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 4 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
- (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
+ (Header.e_flags & EF_AMDGPU_MACH) >
+ EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
>From 84399a95253478736ff25ae8f23745a3febf37d8 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Fri, 6 Dec 2024 18:50:56 -0600
Subject: [PATCH 2/4] Code Review adjustments
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 44 ++++++++++++----------
1 file changed, 25 insertions(+), 19 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index cdc7f5ae0427ad..5fc4a19d8ae144 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,8 +190,9 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}
-Error getTargetTripleAndFeatures(hsa_agent_t Agent,
- SmallVector<std::string> &Targets) {
+Expected<StringRef>
+getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
+ StringRef SpecificTarget;
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
@@ -206,15 +207,18 @@ Error getTargetTripleAndFeatures(hsa_agent_t Agent,
llvm::StringRef TripleTarget(ISAName.begin(), Length);
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
- auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
- if (Target.find("generic") != std::string::npos)
- Targets.push_back(Target);
- else
- Targets[0] = Target;
+ auto Target = TripleTarget.ltrim('-').rtrim('\0');
+ Targets.push_back(Target);
+ if (!Target.ends_with("generic"))
+ SpecificTarget = Target; // Expect one (and only one) to be found
}
return HSA_STATUS_SUCCESS;
});
- return Err;
+ if (Err)
+ return Err;
+ if (SpecificTarget.empty())
+ return Plugin::error("Specific Target ISA not found");
+ return SpecificTarget;
}
} // namespace hsa_utils
@@ -1991,11 +1995,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
// Detect if XNACK is enabled
- SmallVector<std::string> Targets;
- Targets.push_back("");
- if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
- return Err;
- if (Targets[0].find("xnack+") != std::string::npos)
+ SmallVector<StringRef> Targets;
+ auto TargeTripleAndFeaturesOrError =
+ hsa_utils::getTargetTripleAndFeatures(Agent, Targets);
+ if (!TargeTripleAndFeaturesOrError)
+ return TargeTripleAndFeaturesOrError.takeError();
+ if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
+ .contains("xnack+"))
IsXnackEnabled = true;
// detect if device is an APU.
@@ -3209,15 +3215,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
if (!Processor)
return false;
- SmallVector<std::string> Targets;
- Targets.push_back("");
- if (auto Err = hsa_utils::getTargetTripleAndFeatures(
- getKernelAgent(DeviceId), Targets))
- return Err;
+ SmallVector<StringRef> Targets;
+ auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures(
+ getKernelAgent(DeviceId), Targets);
+ if (!TargetTripleAndFeaturesOrError)
+ return TargetTripleAndFeaturesOrError.takeError();
for (auto &Target : Targets)
if (offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
- Target))
+ Target.str()))
return true;
return false;
}
>From 1ac5b2d90a287dd52c7fece7f196303a928235e7 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Mon, 9 Dec 2024 11:33:03 -0600
Subject: [PATCH 3/4] Code Review adjustments
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 29 +++++++---------------
1 file changed, 9 insertions(+), 20 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 5fc4a19d8ae144..492b47a909d79a 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,9 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}
-Expected<StringRef>
-getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
- StringRef SpecificTarget;
+Error getTargetTripleAndFeatures(hsa_agent_t Agent,
+ SmallVector<StringRef> &Targets) {
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
@@ -209,16 +208,10 @@ getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
auto Target = TripleTarget.ltrim('-').rtrim('\0');
Targets.push_back(Target);
- if (!Target.ends_with("generic"))
- SpecificTarget = Target; // Expect one (and only one) to be found
}
return HSA_STATUS_SUCCESS;
});
- if (Err)
- return Err;
- if (SpecificTarget.empty())
- return Plugin::error("Specific Target ISA not found");
- return SpecificTarget;
+ return Err;
}
} // namespace hsa_utils
@@ -1996,12 +1989,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
// Detect if XNACK is enabled
SmallVector<StringRef> Targets;
- auto TargeTripleAndFeaturesOrError =
- hsa_utils::getTargetTripleAndFeatures(Agent, Targets);
- if (!TargeTripleAndFeaturesOrError)
- return TargeTripleAndFeaturesOrError.takeError();
- if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
- .contains("xnack+"))
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
+ return Err;
+ if (!Targets.empty() && Targets[0].contains("xnack+"))
IsXnackEnabled = true;
// detect if device is an APU.
@@ -3216,10 +3206,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
return false;
SmallVector<StringRef> Targets;
- auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures(
- getKernelAgent(DeviceId), Targets);
- if (!TargetTripleAndFeaturesOrError)
- return TargetTripleAndFeaturesOrError.takeError();
+ if (auto Err = hsa_utils::getTargetTripleAndFeatures(
+ getKernelAgent(DeviceId), Targets))
+ return Err;
for (auto &Target : Targets)
if (offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
>From 8eab21c0279c19bdf631cec141bd55d8ffe6ac34 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Mon, 9 Dec 2024 14:28:04 -0600
Subject: [PATCH 4/4] Code Review adjustments
---
offload/plugins-nextgen/amdgpu/src/rtl.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 492b47a909d79a..789b2031ab4a88 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -191,7 +191,7 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
}
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
- SmallVector<StringRef> &Targets) {
+ SmallVector<SmallString<32>> &Targets) {
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
@@ -1988,10 +1988,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;
// Detect if XNACK is enabled
- SmallVector<StringRef> Targets;
+ SmallVector<SmallString<32>> Targets;
if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
return Err;
- if (!Targets.empty() && Targets[0].contains("xnack+"))
+ if (!Targets.empty() && Targets[0].str().contains("xnack+"))
IsXnackEnabled = true;
// detect if device is an APU.
@@ -3205,7 +3205,7 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
if (!Processor)
return false;
- SmallVector<StringRef> Targets;
+ SmallVector<SmallString<32>> Targets;
if (auto Err = hsa_utils::getTargetTripleAndFeatures(
getKernelAgent(DeviceId), Targets))
return Err;
More information about the llvm-commits
mailing list