[llvm] 8529bd7 - [AMDGPU] Respect MBB alignment in the getFunctionCodeSize() (#127142)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 18 13:19:37 PST 2025

Author: Stanislav Mekhanoshin
Date: 2025-02-18T13:19:33-08:00
New Revision: 8529bd7b964cc9fafe8fece84f7bd12dacb09560

URL: https://github.com/llvm/llvm-project/commit/8529bd7b964cc9fafe8fece84f7bd12dacb09560
DIFF: https://github.com/llvm/llvm-project/commit/8529bd7b964cc9fafe8fece84f7bd12dacb09560.diff

LOG: [AMDGPU] Respect MBB alignment in the getFunctionCodeSize() (#127142)




diff  --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 1123696509818..b4d740422b94a 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -212,6 +212,12 @@ uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
   uint64_t CodeSize = 0;
   for (const MachineBasicBlock &MBB : MF) {
+    // The amount of padding to align code can be both underestimated and
+    // overestimated. In case of inline asm used getInstSizeInBytes() will
+    // return a maximum size of a single instruction, where the real size may
+    // 
diff er. At this point CodeSize may be already off.
+    CodeSize = alignTo(CodeSize, MBB.getAlignment());
     for (const MachineInstr &MI : MBB) {
       // TODO: CodeSize should account for multiple functions.

diff  --git a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
index 76eaf350301e4..9ae536af6f0e9 100644
--- a/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
+++ b/llvm/test/CodeGen/AMDGPU/code-size-estimate.mir
@@ -31,3 +31,92 @@ body:             |
+# CHECK: align4:                                 ; @align4
+# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}}          ; encoding: [A,A,0x85,0xbf]
+# CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
+# CHECK: .p2align        2
+# CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
+# CHECK: ; codeLenInByte = 16
+name:            align4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $scc = IMPLICIT_DEF
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+  bb.1:
+  bb.2 (align 4):
+    S_ENDPGM 0
+# CHECK: align8:                                 ; @align8
+# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}}          ; encoding: [A,A,0x85,0xbf]
+# CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
+# CHECK: .p2align        3
+# CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
+# CHECK: ; codeLenInByte = 20
+name:            align8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $scc = IMPLICIT_DEF
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+  bb.1:
+  bb.2 (align 8):
+    S_ENDPGM 0
+# CHECK: align16:                                ; @align16
+# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}}          ; encoding: [A,A,0x85,0xbf]
+# CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
+# CHECK: .p2align        4
+# CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
+# CHECK: ; codeLenInByte = 20
+name:            align16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $scc = IMPLICIT_DEF
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+  bb.1:
+  bb.2 (align 16):
+    S_ENDPGM 0
+# CHECK: align32:                                ; @align32
+# CHECK: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+# CHECK: s_cbranch_scc1 .LBB{{[0-9_]+}}          ; encoding: [A,A,0x85,0xbf]
+# CHECK: s_barrier                               ; encoding: [0x00,0x00,0x8a,0xbf]
+# CHECK: .p2align        5
+# CHECK: s_endpgm                                ; encoding: [0x00,0x00,0x81,0xbf]
+# CHECK: ; codeLenInByte = 36
+name:            align32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $scc = IMPLICIT_DEF
+    S_CBRANCH_SCC1 %bb.2, implicit $scc
+  bb.1:
+  bb.2 (align 32):
+    S_ENDPGM 0


More information about the llvm-commits mailing list