[llvm] [AMDGPU] Fix .Lfunc_end label placement (PR #127549)

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 15:01:44 PST 2025


https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/127549

Now it is placed after the kernel descriptor, even the section
is .rodata, which is wrong. This allows proper code size calculation
in MC.

>From cf668cca6d8499ad0dcf398a2a9c4f21e0c24d35 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Mon, 17 Feb 2025 14:57:47 -0800
Subject: [PATCH] [AMDGPU] Fix .Lfunc_end label placement

Now it is placed after the kernel descriptor, even the section
is .rodata, which is wrong. This allows proper code size calculation
in MC.
---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   | 26 ++++++++++++++++---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h     |  2 +-
 llvm/test/CodeGen/AMDGPU/hsa.ll               |  9 ++++---
 .../CodeGen/AMDGPU/stack-realign-kernel.ll    | 12 +++++++++
 4 files changed, 42 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 031d8f0560ff2..950e9d125763e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -33,6 +33,7 @@
 #include "Utils/SIDefinesUtils.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/CodeGen/AsmPrinterHandler.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
@@ -90,6 +91,24 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmPrinter() {
                                      createAMDGPUAsmPrinterPass);
 }
 
+namespace {
+class AMDGPUAsmPrinterHandler : public AsmPrinterHandler {
+protected:
+  AMDGPUAsmPrinter *Asm;
+
+public:
+  AMDGPUAsmPrinterHandler(AMDGPUAsmPrinter *A) : Asm(A) {}
+
+  virtual void beginFunction(const MachineFunction *MF) override {}
+
+  virtual void endFunction(const MachineFunction *MF) override {
+    Asm->endFunction(MF);
+  }
+
+  virtual void endModule() override {}
+};
+} // End anonymous namespace
+
 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
     : AsmPrinter(TM, std::move(Streamer)) {
@@ -209,13 +228,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
     HSAMetadataStream->emitKernel(*MF, CurrentProgramInfo);
 }
 
-void AMDGPUAsmPrinter::emitFunctionBodyEnd() {
+void AMDGPUAsmPrinter::endFunction(const MachineFunction *MF) {
   const SIMachineFunctionInfo &MFI = *MF->getInfo<SIMachineFunctionInfo>();
   if (!MFI.isEntryFunction())
     return;
 
-  if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
-    return;
+  assert(TM.getTargetTriple().getOS() == Triple::AMDHSA);
 
   auto &Streamer = getTargetStreamer()->getStreamer();
   auto &Context = Streamer.getContext();
@@ -351,6 +369,8 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
     default:
       report_fatal_error("Unexpected code object version");
     }
+
+    addAsmPrinterHandler(std::make_unique<AMDGPUAsmPrinterHandler>(this));
   }
 
   return AsmPrinter::doInitialization(M);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index cc8c4411805e2..3a0da0dc33d9d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -120,7 +120,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   void emitFunctionBodyStart() override;
 
-  void emitFunctionBodyEnd() override;
+  void endFunction(const MachineFunction *MF);
 
   void emitImplicitDef(const MachineInstr *MI) const override;
 
diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll
index 37476203fbfad..6e5f16feb0773 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa.ll
@@ -96,6 +96,12 @@
 ; PRE-GFX10: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}
 ; GFX10: global_store_{{dword|b32}} v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, off
 
+; HSA: s_endpgm
+; HSA-NEXT: .Lfunc_end0:
+; HSA-NEXT: .size   simple, .Lfunc_end0-simple
+
+; HSA: .section .rodata,"a", at progbits
+
 ; HSA: .amdhsa_user_sgpr_private_segment_buffer 1
 ; HSA: .amdhsa_user_sgpr_kernarg_segment_ptr 1
 
@@ -103,9 +109,6 @@
 ; GFX10-W32: .amdhsa_wavefront_size32 1
 ; GFX10-W64: .amdhsa_wavefront_size32 0
 
-; HSA: .Lfunc_end0:
-; HSA: .size   simple, .Lfunc_end0-simple
-
 define amdgpu_kernel void @simple(ptr addrspace(1) %out) #0 {
 entry:
   store i32 0, ptr addrspace(1) %out
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 6ddf0986755f9..2d34169f9f34c 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -15,6 +15,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
+; VI-NEXT:    .Lfunc_end0:
+; VI-NEXT:    .size max_alignment_128, .Lfunc_end0-max_alignment_128
 ; VI-NEXT:    .section .rodata,"a"
 ; VI-NEXT:    .p2align 6
 ; VI-NEXT:    .amdhsa_kernel max_alignment_128
@@ -66,6 +68,8 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .Lfunc_end0:
+; GFX9-NEXT:    .size max_alignment_128, .Lfunc_end0-max_alignment_128
 ; GFX9-NEXT:    .section .rodata,"a"
 ; GFX9-NEXT:    .p2align 6
 ; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
@@ -126,6 +130,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
+; VI-NEXT:    .Lfunc_end1:
+; VI-NEXT:    .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
 ; VI-NEXT:    .section .rodata,"a"
 ; VI-NEXT:    .p2align 6
 ; VI-NEXT:    .amdhsa_kernel stackrealign_attr
@@ -177,6 +183,8 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .Lfunc_end1:
+; GFX9-NEXT:    .size stackrealign_attr, .Lfunc_end1-stackrealign_attr
 ; GFX9-NEXT:    .section .rodata,"a"
 ; GFX9-NEXT:    .p2align 6
 ; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
@@ -237,6 +245,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    s_endpgm
+; VI-NEXT:    .Lfunc_end2:
+; VI-NEXT:    .size alignstack_attr, .Lfunc_end2-alignstack_attr
 ; VI-NEXT:    .section .rodata,"a"
 ; VI-NEXT:    .p2align 6
 ; VI-NEXT:    .amdhsa_kernel alignstack_attr
@@ -288,6 +298,8 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_endpgm
+; GFX9-NEXT:    .Lfunc_end2:
+; GFX9-NEXT:    .size alignstack_attr, .Lfunc_end2-alignstack_attr
 ; GFX9-NEXT:    .section .rodata,"a"
 ; GFX9-NEXT:    .p2align 6
 ; GFX9-NEXT:    .amdhsa_kernel alignstack_attr



More information about the llvm-commits mailing list