[llvm] [Offload] Remove handling for COV4 binaries from offload/ (PR #131033)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 12 14:24:34 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
We moved from cov4 to cov5 a long time ago, and it guards simplifying
some front end code, so we should be able to move up with this.


---
Full diff: https://github.com/llvm/llvm-project/pull/131033.diff


3 Files Affected:

- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+11-16) 
- (modified) offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h (+1-8) 
- (modified) offload/plugins-nextgen/common/src/Utils/ELF.cpp (+2-3) 


``````````diff
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e83d38a14f77f..f64d05744f204 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
   /// Get the HSA kernel object representing the kernel function.
   uint64_t getKernelObject() const { return KernelObject; }
 
-  /// Get the size of implicitargs based on the code object version
-  /// @return 56 for cov4 and 256 for cov5
+  /// Get the size of implicitargs based on the code object version.
   uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; }
 
   /// Indicates whether or not we need to set up our own private segment size.
@@ -3386,20 +3385,16 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
   if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
     return Err;
 
-  // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
-  if (ImplArgs &&
-      getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) {
-    ImplArgs->BlockCountX = NumBlocks[0];
-    ImplArgs->BlockCountY = NumBlocks[1];
-    ImplArgs->BlockCountZ = NumBlocks[2];
-    ImplArgs->GroupSizeX = NumThreads[0];
-    ImplArgs->GroupSizeY = NumThreads[1];
-    ImplArgs->GroupSizeZ = NumThreads[2];
-    ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
-                             ? 3
-                             : 1 + (NumBlocks[1] * NumThreads[1] != 1);
-    ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
-  }
+  ImplArgs->BlockCountX = NumBlocks[0];
+  ImplArgs->BlockCountY = NumBlocks[1];
+  ImplArgs->BlockCountZ = NumBlocks[2];
+  ImplArgs->GroupSizeX = NumThreads[0];
+  ImplArgs->GroupSizeY = NumThreads[1];
+  ImplArgs->GroupSizeZ = NumThreads[2];
+  ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
+                           ? 3
+                           : 1 + (NumBlocks[1] * NumThreads[1] != 1);
+  ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
 
   // Push the kernel launch into the stream.
   return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 43be4e8edeba4..609ead942dbb3 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy {
   uint8_t Unused2[132]; // 132 byte offset.
 };
 
-// Dummy struct for COV4 implicitargs.
-struct AMDGPUImplicitArgsTyCOV4 {
-  uint8_t Unused[56];
-};
-
 /// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
 /// `Version` is the ELF ABI version, e.g. COV5.
 inline uint32_t getImplicitArgsSize(uint16_t Version) {
-  return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
-             ? sizeof(AMDGPUImplicitArgsTyCOV4)
-             : sizeof(AMDGPUImplicitArgsTy);
+  return sizeof(AMDGPUImplicitArgsTy);
 }
 
 /// Reads the AMDGPU specific metadata from the ELF file and propagates the
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index 44d1c737e2efb..b33101b99aa10 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
   if (Header.e_machine == EM_AMDGPU) {
     if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
       return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
-    if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
-        Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
+    if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
         Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
-      return createError("Invalid AMD ABI version, must be version 4 or above");
+      return createError("Invalid AMD ABI version, must be version 5 or above");
     if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
         (Header.e_flags & EF_AMDGPU_MACH) >
             EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)

``````````

</details>


https://github.com/llvm/llvm-project/pull/131033


More information about the llvm-commits mailing list