[llvm] [Offload] Remove handling for COV4 binaries from offload/ (PR #131033)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 12 14:24:34 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
We moved from cov4 to cov5 a long time ago, and it guards simplifying
some front end code, so we should be able to move up with this.
---
Full diff: https://github.com/llvm/llvm-project/pull/131033.diff
3 Files Affected:
- (modified) offload/plugins-nextgen/amdgpu/src/rtl.cpp (+11-16)
- (modified) offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h (+1-8)
- (modified) offload/plugins-nextgen/common/src/Utils/ELF.cpp (+2-3)
``````````diff
diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
index e83d38a14f77f..f64d05744f204 100644
--- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp
+++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp
@@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
/// Get the HSA kernel object representing the kernel function.
uint64_t getKernelObject() const { return KernelObject; }
- /// Get the size of implicitargs based on the code object version
- /// @return 56 for cov4 and 256 for cov5
+ /// Get the size of implicitargs based on the code object version.
uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; }
/// Indicates whether or not we need to set up our own private segment size.
@@ -3386,20 +3385,16 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
return Err;
- // Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
- if (ImplArgs &&
- getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) {
- ImplArgs->BlockCountX = NumBlocks[0];
- ImplArgs->BlockCountY = NumBlocks[1];
- ImplArgs->BlockCountZ = NumBlocks[2];
- ImplArgs->GroupSizeX = NumThreads[0];
- ImplArgs->GroupSizeY = NumThreads[1];
- ImplArgs->GroupSizeZ = NumThreads[2];
- ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
- ? 3
- : 1 + (NumBlocks[1] * NumThreads[1] != 1);
- ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
- }
+ ImplArgs->BlockCountX = NumBlocks[0];
+ ImplArgs->BlockCountY = NumBlocks[1];
+ ImplArgs->BlockCountZ = NumBlocks[2];
+ ImplArgs->GroupSizeX = NumThreads[0];
+ ImplArgs->GroupSizeY = NumThreads[1];
+ ImplArgs->GroupSizeZ = NumThreads[2];
+ ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
+ ? 3
+ : 1 + (NumBlocks[1] * NumThreads[1] != 1);
+ ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
// Push the kernel launch into the stream.
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,
diff --git a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
index 43be4e8edeba4..609ead942dbb3 100644
--- a/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
+++ b/offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h
@@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy {
uint8_t Unused2[132]; // 132 byte offset.
};
-// Dummy struct for COV4 implicitargs.
-struct AMDGPUImplicitArgsTyCOV4 {
- uint8_t Unused[56];
-};
-
/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
/// `Version` is the ELF ABI version, e.g. COV5.
inline uint32_t getImplicitArgsSize(uint16_t Version) {
- return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
- ? sizeof(AMDGPUImplicitArgsTyCOV4)
- : sizeof(AMDGPUImplicitArgsTy);
+ return sizeof(AMDGPUImplicitArgsTy);
}
/// Reads the AMDGPU specific metadata from the ELF file and propagates the
diff --git a/offload/plugins-nextgen/common/src/Utils/ELF.cpp b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
index 44d1c737e2efb..b33101b99aa10 100644
--- a/offload/plugins-nextgen/common/src/Utils/ELF.cpp
+++ b/offload/plugins-nextgen/common/src/Utils/ELF.cpp
@@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
if (Header.e_machine == EM_AMDGPU) {
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
- if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
- Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
+ if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
- return createError("Invalid AMD ABI version, must be version 4 or above");
+ return createError("Invalid AMD ABI version, must be version 5 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
``````````
</details>
https://github.com/llvm/llvm-project/pull/131033
More information about the llvm-commits
mailing list