[llvm-branch-commits] [llvm] [AMDGPU] Commute instructions to avoid VGPR MSB changes (PR #181918)
Stanislav Mekhanoshin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Feb 17 13:57:27 PST 2026
https://github.com/rampitec created https://github.com/llvm/llvm-project/pull/181918
None
>From b52fdb78ff3b9ba2e48ac772e49a14bcaaccebec Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin at amd.com>
Date: Tue, 17 Feb 2026 13:56:12 -0800
Subject: [PATCH] [AMDGPU] Commute instructions to avoid VGPR MSB changes
---
.../Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 15 ++++++++
.../CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir | 37 ++++++++++---------
2 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 4c8b91da765f7..a06f80b0bf143 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -97,6 +97,12 @@ class AMDGPULowerVGPREncoding {
V |= Op.MSBits.value_or(0) << (I * 2);
return V;
}
+
+ bool operator==(const ModeTy Other) const {
+ return encode() == Other.encode();
+ }
+
+ bool operator!=(const ModeTy Other) const { return !(*this == Other); }
};
public:
@@ -276,6 +282,15 @@ bool AMDGPULowerVGPREncoding::runOnMachineInstr(MachineInstr &MI) {
if (Ops.first) {
ModeTy NewMode;
computeMode(NewMode, MI, Ops.first, Ops.second);
+ if (NewMode != CurrentMode && MI.isCommutable() &&
+ TII->commuteInstruction(MI)) {
+ ModeTy NewModeCommuted;
+ computeMode(NewModeCommuted, MI, Ops.first, Ops.second);
+ if (NewModeCommuted == CurrentMode)
+ return false;
+ // Commute back.
+ TII->commuteInstruction(MI);
+ }
return setMode(NewMode, MI.getIterator());
}
assert(!TII->hasVGPRUses(MI) || MI.isMetaInstruction() || MI.isPseudo());
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 7a03abe9b217e..63fe1cf567ba6 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -194,12 +194,11 @@ body: |
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xabae
- ; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2
- ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/
+ ; Commute FMA to avoid mode change
+ ; GCN-NEXT: v_fma_f32 v0 /*v512*/, v2 /*v770*/, v1 /*v513*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xaeba
+ ; GCN-NEXT: s_set_vgpr_msb 0xabba
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
@@ -343,18 +342,15 @@ body: |
; GCN-NEXT: v_fmaak_f32 v0, v1 /*v257*/, v2 /*v258*/, 0x1
$vgpr0 = V_FMAAK_F32 undef $vgpr257, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x541
- ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0
+ ; GCN-NEXT: s_set_vgpr_msb 0x551
+ ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=1
; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1 /*v257*/, v2, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr257, undef $vgpr2, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x4144
- ; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
- ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v1, v2 /*v258*/, 0x1
+ ; Commute FMAAK to avoid mode change
+ ; GCN-NEXT: v_fmaak_f32 v0 /*v256*/, v2 /*v258*/, v1, 0x1
$vgpr256 = V_FMAAK_F32 undef $vgpr1, undef $vgpr258, 1, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x4451
- ; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=1
; GCN-NEXT: v_fmamk_f32 v0 /*v256*/, v1 /*v257*/, 0x1, v2 /*v258*/
$vgpr256 = V_FMAMK_F32 undef $vgpr257, 1, undef $vgpr258, implicit $exec, implicit $mode
@@ -456,17 +452,24 @@ body: |
; GCN-NEXT: v_lshlrev_b32_e64 v0, v1, v0 /*v256*/
$vgpr0 = V_LSHLREV_B32_e64 undef $vgpr1, undef $vgpr256, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x401
- ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
- ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
+ ; Commute SUBREV to avoid mode change
+ ; GCN-NEXT: v_sub_nc_u32_e32 v0, v2, v0 /*v256*/
$vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x104
- ; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=0
; GCN-NEXT: v_subrev_nc_u32_e32 v0, v1, v0 /*v256*/
$vgpr0 = V_SUBREV_U32_e32 undef $vgpr1, undef $vgpr256, implicit $exec
- ; ASM: NumVgprs: 257
+ ; GCN-NEXT: s_set_vgpr_msb 0x406
+ ; ASM-SAME: ; msbs: dst=0 src0=2 src1=1 src2=0
+ ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v512*/, v0 /*v256*/
+ $vgpr0 = V_SUBREV_U32_e32 undef $vgpr512, undef $vgpr256, implicit $exec
+
+ ; GCN-NEXT: s_set_vgpr_msb 0x601
+ ; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
+ ; GCN-NEXT: v_subrev_nc_u32_e32 v0, v0 /*v256*/, v2
+ $vgpr0 = V_SUBREV_U32_e32 undef $vgpr256, undef $vgpr2, implicit $exec
+
+ ; ASM: NumVgprs: 513
...
# ASM-LABEL: {{^}}minimal_mode_change:
More information about the llvm-branch-commits
mailing list