[llvm] [AMDGPU] Reflect amdgpu-waves-per-eu attribute minimum occupancy to RegPressure analysis (used in machine licm, machine scheduler, and register allocation) (PR #167390)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 10 13:02:05 PST 2025
https://github.com/hidekisaito created https://github.com/llvm/llvm-project/pull/167390
None
>From 2bc7030e24f6a4fb06ef89e984b6d02fa36d7971 Mon Sep 17 00:00:00 2001
From: Hideki Saito <hidekido at amd.com>
Date: Mon, 10 Nov 2025 14:49:41 -0600
Subject: [PATCH] [AMDGPU] Reflect amdgpu-waves-per-eu attribute minimum
occupancy to RegPressure analysis (used in machine licm, machine scheduler,
and register allocation)
---
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 5 ++
.../AMDGPU/agpr-copy-no-free-registers.ll | 70 +++++++------------
2 files changed, 32 insertions(+), 43 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a6c1af24e13e9..ed04d6bf713c7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -3758,6 +3758,11 @@ bool SIRegisterInfo::shouldCoalesce(MachineInstr *MI,
unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned MinOcc = ST.getOccupancyWithWorkGroupSizes(MF).first;
+ Function &F = MF.getFunction();
+ if (AMDGPU::getIntegerPairAttribute(F, "amdgpu-waves-per-eu", true) !=
+ std::nullopt) {
+ MinOcc = ST.getWavesPerEU(F).first;
+ }
switch (RC->getID()) {
default:
return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
index ebbeab94066d6..b34f17e28afb2 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
@@ -375,64 +375,48 @@ define void @v32_asm_def_use(float %v0, float %v1) #4 {
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; def v[0:31] a[0:15]
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a15
-; GFX908-NEXT: ;;#ASMSTART
-; GFX908-NEXT: ; def v32
-; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a31, v35
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a15
; GFX908-NEXT: v_accvgpr_read_b32 v35, a14
-; GFX908-NEXT: s_nop 1
+; GFX908-NEXT: v_accvgpr_read_b32 v36, a13
+; GFX908-NEXT: v_accvgpr_write_b32 a31, v32
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a12
; GFX908-NEXT: v_accvgpr_write_b32 a30, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a13
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a29, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a12
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a28, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a29, v36
+; GFX908-NEXT: v_accvgpr_write_b32 a28, v32
; GFX908-NEXT: v_accvgpr_read_b32 v35, a11
-; GFX908-NEXT: s_nop 1
+; GFX908-NEXT: v_accvgpr_read_b32 v36, a10
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a9
; GFX908-NEXT: v_accvgpr_write_b32 a27, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a10
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a26, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a9
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a25, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a26, v36
+; GFX908-NEXT: v_accvgpr_write_b32 a25, v32
; GFX908-NEXT: v_accvgpr_read_b32 v35, a8
-; GFX908-NEXT: s_nop 1
+; GFX908-NEXT: v_accvgpr_read_b32 v36, a7
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a6
; GFX908-NEXT: v_accvgpr_write_b32 a24, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a7
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a23, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a6
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a22, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a23, v36
+; GFX908-NEXT: v_accvgpr_write_b32 a22, v32
; GFX908-NEXT: v_accvgpr_read_b32 v35, a5
-; GFX908-NEXT: s_nop 1
+; GFX908-NEXT: v_accvgpr_read_b32 v36, a4
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a3
; GFX908-NEXT: v_accvgpr_write_b32 a21, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a4
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a20, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a3
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a19, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a20, v36
+; GFX908-NEXT: v_accvgpr_write_b32 a19, v32
; GFX908-NEXT: v_accvgpr_read_b32 v35, a2
-; GFX908-NEXT: s_nop 1
+; GFX908-NEXT: v_accvgpr_read_b32 v36, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v32, a0
; GFX908-NEXT: v_accvgpr_write_b32 a18, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a1
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a17, v35
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a0
-; GFX908-NEXT: s_nop 1
-; GFX908-NEXT: v_accvgpr_write_b32 a16, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a17, v36
+; GFX908-NEXT: v_accvgpr_write_b32 a16, v32
+; GFX908-NEXT: ;;#ASMSTART
+; GFX908-NEXT: ; def v32
+; GFX908-NEXT: ;;#ASMEND
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; copy
; GFX908-NEXT: ;;#ASMEND
-; GFX908-NEXT: v_accvgpr_read_b32 v35, a1
+; GFX908-NEXT: v_accvgpr_read_b32 v37, a1
; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v34, v33, a[16:31]
; GFX908-NEXT: s_nop 0
-; GFX908-NEXT: v_accvgpr_write_b32 a32, v35
+; GFX908-NEXT: v_accvgpr_write_b32 a32, v37
; GFX908-NEXT: ;;#ASMSTART
; GFX908-NEXT: ; copy
; GFX908-NEXT: ;;#ASMEND
More information about the llvm-commits
mailing list