[llvm] [AMDGPU] Switch to MF.estimateFunctionSizeInBytes() (PR #127246)

Tue Feb 18 13:44:48 PST 2025

https://github.com/rampitec updated https://github.com/llvm/llvm-project/pull/127246

>From a35a50c531245e52a779f4c1cfda3b2372ed1528 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Fri, 14 Feb 2025 11:18:49 -0800
Subject: [PATCH] [AMDGPU] Switch to MF.estimateFunctionSizeInBytes()

Both methods are equally inaccurate, we need to switch to MCExpr
for better results in the future.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  2 +-
 llvm/lib/Target/AMDGPU/SIProgramInfo.cpp      | 31 +++----------------
 llvm/lib/Target/AMDGPU/SIProgramInfo.h        |  2 +-
 .../CodeGen/AMDGPU/code-size-estimate.mir     |  6 ++--
 4 files changed, 9 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ceab6c9dcca34..9bf95da53ae5f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -8977,7 +8977,7 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo(), &ST);
   }
   default:
-    if (MI.isMetaInstruction())
+    if (MI.isMetaInstruction() || MI.isDebugInstr())
       return 0;
     return DescSize;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index b4d740422b94a..7169eebf907ca 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -202,32 +202,9 @@ const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
   return MCConstantExpr::create(0, Ctx);
 }
 
-uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
-  if (CodeSizeInBytes.has_value())
-    return *CodeSizeInBytes;
+uint64_t SIProgramInfo::getFunctionCodeSize(MachineFunction &MF) {
+  if (!CodeSizeInBytes.has_value())
+    CodeSizeInBytes = MF.estimateFunctionSizeInBytes();
 
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = STM.getInstrInfo();
-
-  uint64_t CodeSize = 0;
-
-  for (const MachineBasicBlock &MBB : MF) {
-    // The amount of padding to align code can be both underestimated and
-    // overestimated. In case of inline asm used getInstSizeInBytes() will
-    // return a maximum size of a single instruction, where the real size may
-    // differ. At this point CodeSize may be already off.
-    CodeSize = alignTo(CodeSize, MBB.getAlignment());
-
-    for (const MachineInstr &MI : MBB) {
-      // TODO: CodeSize should account for multiple functions.
-
-      if (MI.isMetaInstruction())
-        continue;
-
-      CodeSize += TII->getInstSizeInBytes(MI);
-    }
-  }
-
-  CodeSizeInBytes = CodeSize;
-  return CodeSize;
+  return *CodeSizeInBytes;
 }
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index d7087436ae758..65f8bee1c5118 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -101,7 +101,7 @@ struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
   void reset(const MachineFunction &MF);
 
   // Get function code size and cache the value.
-  uint64_t getFunctionCodeSize(const MachineFunction &MF);
+  uint64_t getFunctionCodeSize(MachineFunction &MF);
 
   /// Compute the value of the ComputePGMRsrc1 register.
   const MCExpr *getComputePGMRSrc1(const GCNSubtarget &ST,
diff --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
index 9ae536af6f0e9..f71160b557650 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
@@ -61,7 +61,7 @@ body:             |
 # CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
 # CHECK: .p2align        3
 # CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
-# CHECK: ; codeLenInByte = 20
+# CHECK: ; codeLenInByte = 24
 ---
 name:            align8
 tracksRegLiveness: true
@@ -83,7 +83,7 @@ body:             |
 # CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
 # CHECK: .p2align        4
 # CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
-# CHECK: ; codeLenInByte = 20
+# CHECK: ; codeLenInByte = 32
 ---
 name:            align16
 tracksRegLiveness: true
@@ -105,7 +105,7 @@ body:             |
 # CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
 # CHECK: .p2align        5
 # CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
-# CHECK: ; codeLenInByte = 36
+# CHECK: ; codeLenInByte = 64
 ---
 name:            align32
 tracksRegLiveness: true