[llvm] [AMDGPU] Hoist s_set_vgpr_msb past SALU control instructions (PR #172108)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 15:57:41 PST 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/172108
>From b2495de3a652040528398c617e7f27b4ebc1f7ec Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:28:09 -0800
Subject: [PATCH 1/3] [AMDGPU] Hoist s_set_vgpr_msb past control instructions
Change-Id: I5cac88448bccfaa903fa1d20ef9bbb3310e9e5ae
---
.../Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 33 ++++++++++
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 6 ++
.../CodeGen/AMDGPU/vgpr-set-msb-coissue.mir | 64 +++++++++++++++++++
3 files changed, 103 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index d7d0292083e1c..a36f6dd90aae8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
/// instruction to extend it or drop the clause if it cannot be adjusted.
MachineBasicBlock::instr_iterator
handleClause(MachineBasicBlock::instr_iterator I);
+
+ /// Check if an instruction \p I is immediately after another control
+ /// instruction which it cannot coissue with. If so, insert before that
+ /// instruction to encourage more coissuing.
+ MachineBasicBlock::instr_iterator
+ handleCoissue(MachineBasicBlock::instr_iterator I);
};
bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
int64_t OldModeBits = CurrentMode << ModeWidth;
I = handleClause(I);
+ I = handleCoissue(I);
MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
.addImm(NewMode | OldModeBits);
@@ -283,6 +290,32 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
return I;
}
+MachineBasicBlock::instr_iterator
+AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
+ // return I;
+ if (I.isEnd())
+ return I;
+
+ if (I == I->getParent()->begin())
+ return I;
+
+ MachineBasicBlock::instr_iterator Prev = std::prev(I);
+ auto isControl = [this](MachineInstr *MI) {
+ return TII->isBarrier(MI->getOpcode()) ||
+ TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
+ MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
+ };
+
+ if (!isControl(&*Prev))
+ return I;
+
+ while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
+ isControl(&*Prev)) {
+ --Prev;
+ }
+ return Prev;
+}
+
bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
if (!ST.has1024AddressableVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c66985a19685b..e2276eef7ab10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
+ static bool isControlInstr(const MachineInstr &MI) {
+ return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
+ MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
+ MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
+ }
+
static bool isVALU(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::VALU;
}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
new file mode 100644
index 0000000000000..cf3ec3686b240
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %
+
+---
+name: multi
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-LABEL: name: multi
+ ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: S_WAIT_DSCNT 0
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+ ; CHECK-NEXT: S_BARRIER_WAIT -1
+ ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+ S_WAIT_DSCNT 0
+ S_BARRIER_SIGNAL_IMM -1
+ S_BARRIER_WAIT -1
+ $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: high_vgprs
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-LABEL: name: high_vgprs
+ ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+ ; CHECK-NEXT: S_BARRIER_WAIT -1
+ ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ S_BARRIER_SIGNAL_IMM -1
+ S_BARRIER_WAIT -1
+ $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: no_control
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-LABEL: name: no_control
+ ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+ $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+ S_ENDPGM 0
+...
>From ceab7e7eec6c1267ad283dbd971ff591626d54da Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:41:21 -0800
Subject: [PATCH 2/3] Naming + commented code
Change-Id: I6729ce873627b13fd112601041901bd7b984b525
---
llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index a36f6dd90aae8..04027c308a4b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -292,7 +292,6 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
MachineBasicBlock::instr_iterator
AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
- // return I;
if (I.isEnd())
return I;
@@ -300,17 +299,17 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
return I;
MachineBasicBlock::instr_iterator Prev = std::prev(I);
- auto isControl = [this](MachineInstr *MI) {
+ auto isControlSALU = [this](MachineInstr *MI) {
return TII->isBarrier(MI->getOpcode()) ||
TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
};
- if (!isControl(&*Prev))
+ if (!isControlSALU(&*Prev))
return I;
while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
- isControl(&*Prev)) {
+ isControlSALU(&*Prev)) {
--Prev;
}
return Prev;
>From 3e2dd1aab7228f19359a417fbf25afe49a8eb12a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:56:57 -0800
Subject: [PATCH 3/3] Do not use word control
Change-Id: I548cec65958ed0f48ae4f1d8012288d121a5491f
---
llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 10 +++++-----
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 04027c308a4b7..86f979b0e9aaa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -138,7 +138,7 @@ class AMDGPULowerVGPREncoding {
MachineBasicBlock::instr_iterator
handleClause(MachineBasicBlock::instr_iterator I);
- /// Check if an instruction \p I is immediately after another control
+ /// Check if an instruction \p I is immediately after another program state
/// instruction which it cannot coissue with. If so, insert before that
/// instruction to encourage more coissuing.
MachineBasicBlock::instr_iterator
@@ -299,17 +299,17 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
return I;
MachineBasicBlock::instr_iterator Prev = std::prev(I);
- auto isControlSALU = [this](MachineInstr *MI) {
+ auto isProgramStatetSALU = [this](MachineInstr *MI) {
return TII->isBarrier(MI->getOpcode()) ||
- TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
+ TII->isWaitcnt(MI || (SIInstrInfo::isProgramStatetSALU(*MI) &&
MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
};
- if (!isControlSALU(&*Prev))
+ if (!isProgramStatetSALU(&*Prev))
return I;
while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
- isControlSALU(&*Prev)) {
+ isProgramStatetSALU(&*Prev)) {
--Prev;
}
return Prev;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e2276eef7ab10..ff1f0a8ec4d81 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,7 +457,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::SALU;
}
- static bool isControlInstr(const MachineInstr &MI) {
+ static bool isProgramStatetSALU(const MachineInstr &MI) {
return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
More information about the llvm-commits
mailing list