[llvm] e45241a - [AMDGPU] Hoist s_set_vgpr_msb past SALU program state instructions (#172108)

via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 18:04:25 PST 2025


Author: Jeffrey Byrnes
Date: 2025-12-12T18:04:20-08:00
New Revision: e45241a4fe9ec896b92802461f6b05da3e53ec9a

URL: https://github.com/llvm/llvm-project/commit/e45241a4fe9ec896b92802461f6b05da3e53ec9a
DIFF: https://github.com/llvm/llvm-project/commit/e45241a4fe9ec896b92802461f6b05da3e53ec9a.diff

LOG: [AMDGPU] Hoist s_set_vgpr_msb past SALU program state instructions (#172108)

Hoisting past the program state instructions is legal and allows for
better coissue.

Added: 
    llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index d7d0292083e1c..c5293651fb833 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -137,6 +137,12 @@ class AMDGPULowerVGPREncoding {
   /// instruction to extend it or drop the clause if it cannot be adjusted.
   MachineBasicBlock::instr_iterator
   handleClause(MachineBasicBlock::instr_iterator I);
+
+  /// Check if an instruction \p I is immediately after another program state
+  /// instruction which it cannot coissue with. If so, insert before that
+  /// instruction to encourage more coissuing.
+  MachineBasicBlock::instr_iterator
+  handleCoissue(MachineBasicBlock::instr_iterator I);
 };
 
 bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
@@ -167,6 +173,7 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
   int64_t OldModeBits = CurrentMode << ModeWidth;
 
   I = handleClause(I);
+  I = handleCoissue(I);
   MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
                           .addImm(NewMode | OldModeBits);
 
@@ -283,6 +290,31 @@ AMDGPULowerVGPREncoding::handleClause(MachineBasicBlock::instr_iterator I) {
   return I;
 }
 
+MachineBasicBlock::instr_iterator
+AMDGPULowerVGPREncoding::handleCoissue(MachineBasicBlock::instr_iterator I) {
+  if (I.isEnd())
+    return I;
+
+  if (I == I->getParent()->begin())
+    return I;
+
+  MachineBasicBlock::instr_iterator Prev = std::prev(I);
+  auto isProgramStateSALU = [this](MachineInstr *MI) {
+    return TII->isBarrier(MI->getOpcode()) ||
+           TII->isWaitcnt(MI || (SIInstrInfo::isProgramStateSALU(*MI) &&
+                                 MI->getOpcode() != AMDGPU::S_SET_VGPR_MSB));
+  };
+
+  if (!isProgramStateSALU(&*Prev))
+    return I;
+
+  while (!Prev.isEnd() && (Prev != Prev->getParent()->begin()) &&
+         isProgramStateSALU(&*Prev)) {
+    --Prev;
+  }
+  return Prev;
+}
+
 bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
   if (!ST.has1024AddressableVGPRs())

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b1d6563bf3c0b..01c09cb3bd896 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -457,6 +457,12 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
     return get(Opcode).TSFlags & SIInstrFlags::SALU;
   }
 
+  static bool isProgramStateSALU(const MachineInstr &MI) {
+    return MI.getOpcode() == AMDGPU::S_DELAY_ALU ||
+           MI.getOpcode() == AMDGPU::S_SET_VGPR_MSB ||
+           MI.getOpcode() == AMDGPU::ATOMIC_FENCE;
+  }
+
   static bool isVALU(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::VALU;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
new file mode 100644
index 0000000000000..066d3e1d4c097
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-set-msb-coissue.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+
+---
+name:            multi
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: multi
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_WAIT_DSCNT 0
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  S_WAIT_DSCNT 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            high_vgprs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: high_vgprs
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+    ; CHECK-NEXT: S_BARRIER_WAIT -1
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  S_BARRIER_SIGNAL_IMM -1
+  S_BARRIER_WAIT -1
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...
+
+---
+name:            no_control
+tracksRegLiveness: true
+body:             |
+  bb.0:
+  liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-LABEL: name: no_control
+    ; CHECK: liveins: $vgpr10, $vgpr11, $vgpr900, $vgpr901
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+  $vgpr11 = nofpexcept V_EXP_F32_e32 killed $vgpr10, implicit $mode, implicit $exec
+  $vgpr256 = nofpexcept V_EXP_F32_e32 killed $vgpr257, implicit $mode, implicit $exec
+  S_ENDPGM 0
+...


        


More information about the llvm-commits mailing list