[llvm] [AMDGPU] Hoist s_set_vgpr_msb past SALU control instructions (PR #172108)

Jeffrey Byrnes via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 15:57:41 PST 2025


https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/172108

>From b2495de3a652040528398c617e7f27b4ebc1f7ec Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:28:09 -0800
Subject: [PATCH 1/3] [AMDGPU] Hoist s_set_vgpr_msb past control instructions

Change-Id: I5cac88448bccfaa903fa1d20ef9bbb3310e9e5ae
---
 .../Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 33 ++++++++++
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |  6 ++
 .../CodeGen/AMDGPU/vgpr-set-msb-coissue.mir   | 64 +++++++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index d7d0292083e1c..a36f6dd90aae8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
   /// instruction to extend it or drop the clause if it cannot be adjusted.
   MachineBasicBlock::instr_iterator
   handleClause(MachineBasicBlock::instr_iterator I);
+
+  /// Check if an instruction \p I is immediately after another control
+  /// instruction which it cannot coissue with. If so, insert before that
+  /// instruction to encourage more coissuing.
+  MachineBasicBlock::instr_iterator
+  handleCoissue(MachineBasicBlock::instr_iterator I);
 };
 
 bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
   int64_t OldModeBits = CurrentMode << ModeWidth;
 
   I = handleClause(I);
+  I = handleCoissue(I);
   MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
                           .addImm(NewMode | OldModeBits);
 
@@ -283,6 +290,32 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
   return I;
 }
 
+MachineBasicBlock::instr_iterator
+AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
+  // return I;
+  if (I.isEnd())
+    return I;
+
+  if (I == I->getParent()->begin())
+    return I;
+
+  MachineBasicBlock::instr_iterator Prev = std::prev(I);
+  auto isControl = [this](MachineInstr *MI) {
+    return TII->isBarrier(MI->getOpcode()) ||
+           TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
+                                 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
+  };
+
+  if (!isControl(&*Prev))
+    return I;
+
+  while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
+         isControl(&*Prev)) {
+    --Prev;
+  }
+  return Prev;
+}
+
 bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   if (!ST.has1024AddressableVGPRs())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index c66985a19685b..e2276eef7ab10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
 
+  static bool isControlInstr(const MachineInstr &MI) {
+    return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
+           MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
+           MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
+  }
+
   static bool isVALU(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
new file mode 100644
index 0000000000000..cf3ec3686b240
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %
+
+---
+name:            multi
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: multi
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_WAIT_DSCNT 0
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  S_WAIT_DSCNT 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            high_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: high_vgprs
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            no_control
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: no_control
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...

>From ceab7e7eec6c1267ad283dbd971ff591626d54da Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:41:21 -0800
Subject: [PATCH 2/3] Naming + commented code

Change-Id: I6729ce873627b13fd112601041901bd7b984b525
---
 llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index a36f6dd90aae8..04027c308a4b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -292,7 +292,6 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
 
 MachineBasicBlock::instr_iterator
 AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
-  // return I;
   if (I.isEnd())
     return I;
 
@@ -300,17 +299,17 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
     return I;
 
   MachineBasicBlock::instr_iterator Prev = std::prev(I);
-  auto isControl = [this](MachineInstr *MI) {
+  auto isControlSALU = [this](MachineInstr *MI) {
     return TII->isBarrier(MI->getOpcode()) ||
            TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
                                  MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
   };
 
-  if (!isControl(&*Prev))
+  if (!isControlSALU(&*Prev))
     return I;
 
   while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
-         isControl(&*Prev)) {
+         isControlSALU(&*Prev)) {
     --Prev;
   }
   return Prev;

>From 3e2dd1aab7228f19359a417fbf25afe49a8eb12a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 12 Dec 2025 15:56:57 -0800
Subject: [PATCH 3/3] Do not use word control

Change-Id: I548cec65958ed0f48ae4f1d8012288d121a5491f
---
 llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp | 10 +++++-----
 llvm/lib/Target/AMDGPU/SIInstrInfo.h               |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 04027c308a4b7..86f979b0e9aaa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -138,7 +138,7 @@ class AMDGPULowerVGPREncoding {
   MachineBasicBlock::instr_iterator
   handleClause(MachineBasicBlock::instr_iterator I);
 
-  /// Check if an instruction \p I is immediately after another control
+  /// Check if an instruction \p I is immediately after another program state
   /// instruction which it cannot coissue with. If so, insert before that
   /// instruction to encourage more coissuing.
   MachineBasicBlock::instr_iterator
@@ -299,17 +299,17 @@ AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
     return I;
 
   MachineBasicBlock::instr_iterator Prev = std::prev(I);
-  auto isControlSALU = [this](MachineInstr *MI) {
+  auto isProgramStatetSALU = [this](MachineInstr *MI) {
     return TII->isBarrier(MI->getOpcode()) ||
-           TII->isWaitcnt(MI || (SIInstrInfo::isControlInstr(*MI) &&
+           TII->isWaitcnt(MI || (SIInstrInfo::isProgramStatetSALU(*MI) &&
                                  MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
   };
 
-  if (!isControlSALU(&*Prev))
+  if (!isProgramStatetSALU(&*Prev))
     return I;
 
   while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
-         isControlSALU(&*Prev)) {
+         isProgramStatetSALU(&*Prev)) {
     --Prev;
   }
   return Prev;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index e2276eef7ab10..ff1f0a8ec4d81 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,7 +457,7 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
 
-  static bool isControlInstr(const MachineInstr &MI) {
+  static bool isProgramStatetSALU(const MachineInstr &MI) {
     return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
            MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
            MI.getOpcode() == AMDGPU::ATOMIC_FENCE;



More information about the llvm-commits mailing list