[llvm] [Offload][AMDGPU] accept generic target (PR #118919)

via llvm-commits llvm-commits at lists.llvm.org
Mon Dec 9 09:34:45 PST 2024


https://github.com/hidekisaito updated https://github.com/llvm/llvm-project/pull/118919

>From 07d439c245b333f516316bff72f98c9b35b6e3f8 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Thu, 5 Dec 2024 21:46:06 -0600
Subject: [PATCH 1/3] [Offload][AMDGPU] accept generic target

---
 offload/DeviceRTL/CMakeLists.txt              | 15 ++++--
 offload/plugins-nextgen/amdgpu/src/rtl.cpp    | 48 +++++++++++--------
 .../plugins-nextgen/common/src/Utils/ELF.cpp  |  3 +-
 3 files changed, 39 insertions(+), 27 deletions(-)

diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 32a7510be980d8..b9659ca3d7a9c2 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
 set(include_directory ${devicertl_base_directory}/include)
 set(source_directory ${devicertl_base_directory}/src)
 
-set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
-                             "gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
-                             "gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
-                             "gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
-                             "gfx1151;gfx1152;gfx1153")
+set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
+                             "gfx9-generic;gfx900;gfx902;gfx906;gfx908"
+                             "gfx90a;gfx90c"
+                             "gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
+                             "gfx10-1-generic;gfx1010;gfx1012"
+                             "gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
+                             "gfx1034;gfx1035;gfx1036"
+                             "gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
+                             "gfx1150;gfx1151;gfx1152;gfx1153"
+                             "gfx12-generic")
 set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
                             "sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
 set(all_gpu_architectures
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index d74e65d4165679..cdc7f5ae0427ad 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
 #endif
 }
 
-Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
-  std::string Target;
+Error getTargetTripleAndFeatures(hsa_agent_t Agent,
+                                 SmallVector<std::string> &Targets) {
   auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
     uint32_t Length;
     hsa_status_t Status;
@@ -205,13 +205,16 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
       return Status;
 
     llvm::StringRef TripleTarget(ISAName.begin(), Length);
-    if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
-      Target = TripleTarget.ltrim('-').rtrim('\0').str();
-    return HSA_STATUS_INFO_BREAK;
+    if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
+      auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
+      if (Target.find("generic") != std::string::npos)
+        Targets.push_back(Target);
+      else
+        Targets[0] = Target;
+    }
+    return HSA_STATUS_SUCCESS;
   });
-  if (Err)
-    return Err;
-  return Target;
+  return Err;
 }
 } // namespace hsa_utils
 
@@ -1988,12 +1991,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       return Err;
 
     // Detect if XNACK is enabled
-    auto TargeTripleAndFeaturesOrError =
-        hsa_utils::getTargetTripleAndFeatures(Agent);
-    if (!TargeTripleAndFeaturesOrError)
-      return TargeTripleAndFeaturesOrError.takeError();
-    if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
-            .contains("xnack+"))
+    SmallVector<std::string> Targets;
+    Targets.push_back("");
+    if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
+      return Err;
+    if (Targets[0].find("xnack+") != std::string::npos)
       IsXnackEnabled = true;
 
     // detect if device is an APU.
@@ -3207,13 +3209,17 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     if (!Processor)
       return false;
 
-    auto TargeTripleAndFeaturesOrError =
-        hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
-    if (!TargeTripleAndFeaturesOrError)
-      return TargeTripleAndFeaturesOrError.takeError();
-    return offloading::amdgpu::isImageCompatibleWithEnv(
-        Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
-        *TargeTripleAndFeaturesOrError);
+    SmallVector<std::string> Targets;
+    Targets.push_back("");
+    if (auto Err = hsa_utils::getTargetTripleAndFeatures(
+            getKernelAgent(DeviceId), Targets))
+      return Err;
+    for (auto &Target : Targets)
+      if (offloading::amdgpu::isImageCompatibleWithEnv(
+              Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
+              Target))
+        return true;
+    return false;
   }
 
   bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index f5037611e72e0e..10b32440dc8778 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
         Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
       return createError("Invalid AMD ABI version, must be version 4 or above");
     if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
-        (Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
+        (Header.e_flags & EF_AMDGPU_MACH) >
+            EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
       return createError("Unsupported AMDGPU architecture");
   } else if (Header.e_machine == EM_CUDA) {
     if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)

>From 84399a95253478736ff25ae8f23745a3febf37d8 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Fri, 6 Dec 2024 18:50:56 -0600
Subject: [PATCH 2/3] Code Review adjustments

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 44 ++++++++++++----------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index cdc7f5ae0427ad..5fc4a19d8ae144 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,8 +190,9 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
 #endif
 }
 
-Error getTargetTripleAndFeatures(hsa_agent_t Agent,
-                                 SmallVector<std::string> &Targets) {
+Expected<StringRef>
+getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
+  StringRef SpecificTarget;
   auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
     uint32_t Length;
     hsa_status_t Status;
@@ -206,15 +207,18 @@ Error getTargetTripleAndFeatures(hsa_agent_t Agent,
 
     llvm::StringRef TripleTarget(ISAName.begin(), Length);
     if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
-      auto Target = TripleTarget.ltrim('-').rtrim('\0').str();
-      if (Target.find("generic") != std::string::npos)
-        Targets.push_back(Target);
-      else
-        Targets[0] = Target;
+      auto Target = TripleTarget.ltrim('-').rtrim('\0');
+      Targets.push_back(Target);
+      if (!Target.ends_with("generic"))
+        SpecificTarget = Target; // Expect one (and only one) to be found
     }
     return HSA_STATUS_SUCCESS;
   });
-  return Err;
+  if (Err)
+    return Err;
+  if (SpecificTarget.empty())
+    return Plugin::error("Specific Target ISA not found");
+  return SpecificTarget;
 }
 } // namespace hsa_utils
 
@@ -1991,11 +1995,13 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
       return Err;
 
     // Detect if XNACK is enabled
-    SmallVector<std::string> Targets;
-    Targets.push_back("");
-    if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
-      return Err;
-    if (Targets[0].find("xnack+") != std::string::npos)
+    SmallVector<StringRef> Targets;
+    auto TargeTripleAndFeaturesOrError =
+        hsa_utils::getTargetTripleAndFeatures(Agent, Targets);
+    if (!TargeTripleAndFeaturesOrError)
+      return TargeTripleAndFeaturesOrError.takeError();
+    if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
+            .contains("xnack+"))
       IsXnackEnabled = true;
 
     // detect if device is an APU.
@@ -3209,15 +3215,15 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
     if (!Processor)
       return false;
 
-    SmallVector<std::string> Targets;
-    Targets.push_back("");
-    if (auto Err = hsa_utils::getTargetTripleAndFeatures(
-            getKernelAgent(DeviceId), Targets))
-      return Err;
+    SmallVector<StringRef> Targets;
+    auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures(
+        getKernelAgent(DeviceId), Targets);
+    if (!TargetTripleAndFeaturesOrError)
+      return TargetTripleAndFeaturesOrError.takeError();
     for (auto &Target : Targets)
       if (offloading::amdgpu::isImageCompatibleWithEnv(
               Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
-              Target))
+              Target.str()))
         return true;
     return false;
   }

>From 1ac5b2d90a287dd52c7fece7f196303a928235e7 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Mon, 9 Dec 2024 11:33:03 -0600
Subject: [PATCH 3/3] Code Review adjustments

---
 offload/plugins-nextgen/amdgpu/src/rtl.cpp | 29 +++++++---------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index 5fc4a19d8ae144..492b47a909d79a 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -190,9 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
 #endif
 }
 
-Expected<StringRef>
-getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
-  StringRef SpecificTarget;
+Error getTargetTripleAndFeatures(hsa_agent_t Agent,
+                                 SmallVector<StringRef> &Targets) {
   auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
     uint32_t Length;
     hsa_status_t Status;
@@ -209,16 +208,10 @@ getTargetTripleAndFeatures(hsa_agent_t Agent, SmallVector<StringRef> &Targets) {
     if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
       auto Target = TripleTarget.ltrim('-').rtrim('\0');
       Targets.push_back(Target);
-      if (!Target.ends_with("generic"))
-        SpecificTarget = Target; // Expect one (and only one) to be found
     }
     return HSA_STATUS_SUCCESS;
   });
-  if (Err)
-    return Err;
-  if (SpecificTarget.empty())
-    return Plugin::error("Specific Target ISA not found");
-  return SpecificTarget;
+  return Err;
 }
 } // namespace hsa_utils
 
@@ -1996,12 +1989,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
 
     // Detect if XNACK is enabled
     SmallVector<StringRef> Targets;
-    auto TargeTripleAndFeaturesOrError =
-        hsa_utils::getTargetTripleAndFeatures(Agent, Targets);
-    if (!TargeTripleAndFeaturesOrError)
-      return TargeTripleAndFeaturesOrError.takeError();
-    if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
-            .contains("xnack+"))
+    if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
+      return Err;
+    if (!Targets.empty() && Targets[0].contains("xnack+"))
       IsXnackEnabled = true;
 
     // detect if device is an APU.
@@ -3216,10 +3206,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
       return false;
 
     SmallVector<StringRef> Targets;
-    auto TargetTripleAndFeaturesOrError = hsa_utils::getTargetTripleAndFeatures(
-        getKernelAgent(DeviceId), Targets);
-    if (!TargetTripleAndFeaturesOrError)
-      return TargetTripleAndFeaturesOrError.takeError();
+    if (auto Err = hsa_utils::getTargetTripleAndFeatures(
+            getKernelAgent(DeviceId), Targets))
+      return Err;
     for (auto &Target : Targets)
       if (offloading::amdgpu::isImageCompatibleWithEnv(
               Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),



More information about the llvm-commits mailing list