[llvm] [AMDGPU] Fix .Lfunc_end label placement (PR #127549)
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 17 15:01:44 PST 2025
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/127549
Now it is placed after the kernel descriptor, even the section
is .rodata, which is wrong. This allows proper code size calculation
in MC.
>From cf668cca6d8499ad0dcf398a2a9c4f21e0c24d35 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Mon, 17 Feb 2025 14:57:47 -0800
Subject: [PATCH] [AMDGPU] Fix .Lfunc_end label placement
Now it is placed after the kernel descriptor, even the section
is .rodata, which is wrong. This allows proper code size calculation
in MC.
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 26 ++++++++++++++++---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h | 2 +-
llvm/test/CodeGen/AMDGPU/hsa.ll | 9 ++++---
.../CodeGen/AMDGPU/stack-realign-kernel.ll | 12 +++++++++
4 files changed, 42 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 031d8f0560ff2..950e9d125763e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -33,6 +33,7 @@
#include "Utils/SIDefinesUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -90,6 +91,24 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {
createAMDGPUAsmPrinterPass);
}
+namespace {
+class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
+protected:
+ AMDGPUAsmPrinter *Asm;
+
+public:
+ AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
+
+ virtual void beginFunction(const MachineFunction *MF) override {}
+
+ virtual void endFunction(const MachineFunction *MF) override {
+ Asm->endFunction(MF);
+ }
+
+ virtual void endModule() override {}
+};
+} // End anonymous namespace
+
AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {
@@ -209,13 +228,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
}
-void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
+void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) {
const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
if (!MFI.isEntryFunction())
return;
- if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
- return;
+ assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
auto &Streamer = getTargetStreamer()->getStreamer();
auto &Context = Streamer.getContext();
@@ -351,6 +369,8 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
default:
report_fatal_error("Unexpected code object version");
}
+
+ addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
}
return AsmPrinter::doInitialization(M);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index cc8c4411805e2..3a0da0dc33d9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -120,7 +120,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
void emitFunctionBodyStart() override;
- void emitFunctionBodyEnd() override;
+ void endFunction(const MachineFunction *MF);
void emitImplicitDef(const MachineInstr *MI) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index 37476203fbfad..6e5f16feb0773 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -96,6 +96,12 @@
; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
+; HSA: s_endpgm
+; HSA-NEXT: .Lfunc_end0:
+; HSA-NEXT: .size simple, .Lfunc_end0-simple
+
+; HSA: .section .rodata,"a", at progbits
+
; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
@@ -103,9 +109,6 @@
; GFX10-W32: .amdhsa_wavefront_size32 1
; GFX10-W64: .amdhsa_wavefront_size32 0
-; HSA: .Lfunc_end0:
-; HSA: .size simple, .Lfunc_end0-simple
-
define amdgpu_kernel void @simple(ptr addrspace(1) %out) #0 {
entry:
store i32 0, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 6ddf0986755f9..2d34169f9f34c 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -15,6 +15,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
+; VI-NEXT: .Lfunc_end0:
+; VI-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
; VI-NEXT: .section .rodata,"a"
; VI-NEXT: .p2align 6
; VI-NEXT: .amdhsa_kernel max_alignment_128
@@ -66,6 +68,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
+; GFX9-NEXT: .Lfunc_end0:
+; GFX9-NEXT: .size max_alignment_128, .Lfunc_end0-max_alignment_128
; GFX9-NEXT: .section .rodata,"a"
; GFX9-NEXT: .p2align 6
; GFX9-NEXT: .amdhsa_kernel max_alignment_128
@@ -126,6 +130,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
+; VI-NEXT: .Lfunc_end1:
+; VI-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
; VI-NEXT: .section .rodata,"a"
; VI-NEXT: .p2align 6
; VI-NEXT: .amdhsa_kernel stackrealign_attr
@@ -177,6 +183,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
+; GFX9-NEXT: .Lfunc_end1:
+; GFX9-NEXT: .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
; GFX9-NEXT: .section .rodata,"a"
; GFX9-NEXT: .p2align 6
; GFX9-NEXT: .amdhsa_kernel stackrealign_attr
@@ -237,6 +245,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_endpgm
+; VI-NEXT: .Lfunc_end2:
+; VI-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
; VI-NEXT: .section .rodata,"a"
; VI-NEXT: .p2align 6
; VI-NEXT: .amdhsa_kernel alignstack_attr
@@ -288,6 +298,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
+; GFX9-NEXT: .Lfunc_end2:
+; GFX9-NEXT: .size alignstack_attr, .Lfunc_end2-alignstack_attr
; GFX9-NEXT: .section .rodata,"a"
; GFX9-NEXT: .p2align 6
; GFX9-NEXT: .amdhsa_kernel alignstack_attr
More information about the llvm-commits
mailing list