[llvm] 1813603 - [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped (#184904)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 10 06:35:52 PDT 2026


Author: Yaxun (Sam) Liu
Date: 2026-03-10T09:35:45-04:00
New Revision: 1813603933d19111e5310c5302b76be4de06dcab

URL: https://github.com/llvm/llvm-project/commit/1813603933d19111e5310c5302b76be4de06dcab
DIFF: https://github.com/llvm/llvm-project/commit/1813603933d19111e5310c5302b76be4de06dcab.diff

LOG: [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped (#184904)

[AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped after
S_SETREG_IMM32_B32 (MODE)

On GFX1250, S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32
targeting
the MODE register is silently dropped by hardware.

AMDGPULowerVGPREncoding may insert S_SET_VGPR_MSB after a setreg(MODE)
in
Case 2 (size > 12) when imm32[12:19] doesn't match current VGPR MSBs, or
when the next VALU instruction needs different MSBs. Fix by inserting
S_NOP
between the setreg and S_SET_VGPR_MSB to prevent the hazard.

The fix handles two scenarios:
- Case 2 mismatch: S_NOP is inserted directly before S_SET_VGPR_MSB in
  handleSetregMode.
- Case 2 match followed by a VALU with different MSBs: a flag
  (NeedNopBeforeSetVGPRMSB) is set, and setMode inserts S_NOP before the
  next S_SET_VGPR_MSB.

Also adds vcmpx-permlane-vgpr-msb-gfx1250.mir to verify that VGPR
lowering
must run after the hazard recognizer: fixVcmpxPermlaneHazards creates
V_MOV_B32 using high VGPRs that need correct S_SET_VGPR_MSB from the
lowering pass.

Added: 
    llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
    llvm/test/CodeGen/AMDGPU/vcmpx-permlane-vgpr-msb-gfx1250.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
    llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index cbbc90a0f25b0..f18cc5f59ac07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -155,6 +155,11 @@ class AMDGPULowerVGPREncoding {
   /// Last hard clause instruction.
   MachineInstr *Clause;
 
+  /// S_SET_VGPR_MSB immediately after S_SETREG_IMM32_B32 targeting MODE is
+  /// silently dropped on GFX1250. When set, the next S_SET_VGPR_MSB insertion
+  /// must be preceded by S_NOP to avoid the hazard.
+  bool NeedNopBeforeSetVGPRMSB;
+
   /// Insert mode change before \p I. \returns true if mode was changed.
   bool setMode(ModeTy NewMode, MachineBasicBlock::instr_iterator I);
 
@@ -251,6 +256,14 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode,
 
   I = handleClause(I);
   I = handleCoissue(I);
+  // Case 2 match in handleSetregMode: the setreg's imm[12:19] matched
+  // current MSBs, but the next VALU needs 
diff erent MSBs, so this
+  // S_SET_VGPR_MSB would land right after the setreg. Insert S_NOP to
+  // prevent it from being silently dropped.
+  if (NeedNopBeforeSetVGPRMSB) {
+    BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_NOP)).addImm(0);
+    NeedNopBeforeSetVGPRMSB = false;
+  }
   MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
                           .addImm(NewMode.encode() | OldModeBits);
   LLVM_DEBUG(dbgs() << "    -> inserted new S_SET_VGPR_MSB: "
@@ -508,14 +521,18 @@ bool AMDGPULowerVGPREncoding::handleSetregMode(MachineInstr &MI) {
     // via piggybacking (bits[12:19] are meaningful), so if CurrentMode changes,
     // a new s_set_vgpr_msb will be inserted after this instruction.
     MostRecentModeSet = nullptr;
+    NeedNopBeforeSetVGPRMSB = true;
     LLVM_DEBUG(dbgs() << "    -> bits[12:19] already correct, "
                          "invalidated MostRecentModeSet\n");
     return false;
   }
 
   // imm32[12:19] doesn't match VGPR MSBs - insert s_set_vgpr_msb after
-  // the original instruction to restore the correct value.
+  // the original instruction to restore the correct value. Insert S_NOP
+  // to avoid the GFX1250 hazard where S_SET_VGPR_MSB immediately after
+  // S_SETREG_IMM32_B32(MODE) is silently dropped.
   MachineBasicBlock::iterator InsertPt = std::next(MI.getIterator());
+  BuildMI(*MBB, InsertPt, MI.getDebugLoc(), TII->get(AMDGPU::S_NOP)).addImm(0);
   MostRecentModeSet = BuildMI(*MBB, InsertPt, MI.getDebugLoc(),
                               TII->get(AMDGPU::S_SET_VGPR_MSB))
                           .addImm(ModeValue);
@@ -540,6 +557,7 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
   CurrentMode = {};
   for (auto &MBB : MF) {
     MostRecentModeSet = nullptr;
+    NeedNopBeforeSetVGPRMSB = false;
     this->MBB = &MBB;
 
     LLVM_DEBUG(dbgs() << "BB#" << MBB.getNumber() << ' ' << MBB.getName()
@@ -556,6 +574,7 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
           CurrentMode = {};
         else
           resetMode(MI.getIterator());
+        NeedNopBeforeSetVGPRMSB = false;
         continue;
       }
 
@@ -563,6 +582,7 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
         LLVM_DEBUG(dbgs() << "  inline asm: " << MI);
         if (TII->hasVGPRUses(MI))
           resetMode(MI.getIterator());
+        NeedNopBeforeSetVGPRMSB = false;
         continue;
       }
 
@@ -584,6 +604,7 @@ bool AMDGPULowerVGPREncoding::run(MachineFunction &MF) {
       }
 
       Changed |= runOnMachineInstr(MI);
+      NeedNopBeforeSetVGPRMSB = false;
 
       if (ClauseRemaining)
         --ClauseRemaining;

diff  --git a/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
new file mode 100644
index 0000000000000..95c8c67566f51
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
@@ -0,0 +1,119 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+
+# Test handling of the GFX1250 hardware hazard where S_SET_VGPR_MSB immediately
+# after S_SETREG_IMM32_B32 (MODE) is silently dropped.
+#
+# AMDGPULowerVGPREncoding may place S_SET_VGPR_MSB after S_SETREG_IMM32_B32
+# (MODE) in Case 2 (size > 12). It inserts S_NOPs between them to prevent
+# the hazard.
+
+---
+# Case 2 mismatch: setreg (size=16) with imm32[12:19] that doesn't match
+# current VGPR MSB. AMDGPULowerVGPREncoding inserts S_NOP + S_SET_VGPR_MSB
+# after the setreg.
+name: setreg_mode_size_gt_12_mismatch
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_mismatch
+    ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    ; imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23, doesn't match VGPR MSB mode)
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 with 
diff erent next MSB: setreg (size=16) with imm32[12:19] that
+# doesn't match current VGPR MSB. S_NOP + S_SET_VGPR_MSB is inserted to
+# restore current mode, then another S_SET_VGPR_MSB for the next VALU
+# (v512/v513).
+name: setreg_mode_size_gt_12_matches_next
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_matches_next
+    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
+    ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    ; imm32 = 0xAABC = 43708 (bits 12:19 = 0xA = 10, matches next MSB for v512/v513)
+    S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+    $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+# No hazard: S_SETREG_IMM32_B32 targeting non-MODE register.
+name: setreg_non_mode_no_hazard
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_non_mode_no_hazard
+    ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 16384, implicit-def $mode
+    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(STATUS, 2, 2): simm16 = 2 | (2 << 6) | (1 << 11) = 0x882 = 2178
+    S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+    $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 but no high VGPRs before setreg. The lowering pass still inserts
+# S_NOP + S_SET_VGPR_MSB 0 (redundant but safe).
+name: setreg_mode_size_gt_12_no_high_vgpr
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_no_high_vgpr
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 with high VGPR only AFTER setreg: setreg (size=16) with low VGPRs
+# before but high VGPRs after. AMDGPULowerVGPREncoding inserts S_NOP +
+# S_SET_VGPR_MSB before the next VALU (which is right after setreg).
+name: setreg_mode_size_gt_12_high_vgpr_after
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_high_vgpr_after
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-vgpr-msb-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-vgpr-msb-gfx1250.mir
new file mode 100644
index 0000000000000..8be4c6449d596
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/vcmpx-permlane-vgpr-msb-gfx1250.mir
@@ -0,0 +1,69 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# Test: fixVcmpxPermlaneHazards + S_SET_VGPR_MSB interaction on GFX1250
+#
+# Proves that VGPR lowering MUST run AFTER the hazard recognizer.
+#
+# fixVcmpxPermlaneHazards creates V_MOV_B32 Reg, Reg using permlane's src0.
+# When src0 is a high VGPR (>= v256) but vdst is a low VGPR, the V_MOV_B32
+# needs {dst=1, src0=1} MSB mode, while the permlane needs {dst=0, src0=1}.
+#
+# Current order (hazard-rec then vgpr-lowering): VGPR lowering sees the
+# V_MOV_B32 and inserts S_SET_VGPR_MSB with correct {dst=1, src0=1}.
+#
+# Reversed order (vgpr-lowering then hazard-rec): The V_MOV_B32 inherits the
+# permlane's {dst=0, src0=1}, causing it to write to v44 instead of v300.
+
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=post-RA-hazard-rec,amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -start-before=post-RA-hazard-rec -o - %s | FileCheck -check-prefix=ASM %s
+
+# vcmpx + permlane with high VGPR src0 ($vgpr300) and low VGPR vdst ($vgpr0).
+# fixVcmpxPermlaneHazards inserts V_MOV_B32 $vgpr300, $vgpr300 before permlane.
+#
+# With correct order (hazard-rec then vgpr-lowering), VGPR lowering sees the
+# V_MOV_B32 and inserts S_SET_VGPR_MSB 65 {dst=1, src0=1} before it,
+# then S_SET_VGPR_MSB {dst=0, src0=1} before the permlane.
+#
+# If the order were reversed (vgpr-lowering then hazard-rec), VGPR lowering
+# would only see the permlane and insert S_SET_VGPR_MSB 1 {dst=0, src0=1}.
+# The V_MOV_B32 inserted later by the hazard recognizer would inherit dst=0,
+# writing to v44 (300 & 0xFF) instead of v300.
+#
+# ASM-LABEL: vcmpx_permlane_high_vgpr_src0:
+# ASM:      v_cmpx_le_f32_e32 0, v0
+# ASM:      s_set_vgpr_msb 0x41
+# ASM-NEXT: v_mov_b32_e32 v44 /*v300*/, v44 /*v300*/
+# ASM-NEXT: s_set_vgpr_msb 0x4101
+# ASM:      v_permlane16_b32 v0, v44 /*v300*/, s1, s0
+---
+name:            vcmpx_permlane_high_vgpr_src0
+tracksRegLiveness: true
+body:            |
+  ; CHECK-LABEL: name: vcmpx_permlane_high_vgpr_src0
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; CHECK-NEXT:   V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
+  ; CHECK-NEXT:   S_BRANCH %bb.1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr300, $sgpr0, $sgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SET_VGPR_MSB 65, implicit-def $mode
+  ; CHECK-NEXT:   $vgpr300 = V_MOV_B32_e32 killed $vgpr300, implicit $exec
+  ; CHECK-NEXT:   S_SET_VGPR_MSB 16641, implicit-def $mode
+  ; CHECK-NEXT:   $vgpr0 = V_PERMLANE16_B32_e64 0, killed $vgpr300, 0, killed $sgpr1, 0, killed $sgpr0, $vgpr0, 0, implicit $exec
+  ; CHECK-NEXT:   S_ENDPGM 0
+  bb.0:
+    successors: %bb.1
+    liveins: $vgpr0
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    V_CMPX_LE_F32_nosdst_e32 0, $vgpr0, implicit-def $exec, implicit $mode, implicit $exec
+    S_BRANCH %bb.1
+
+  bb.1:
+    liveins: $vgpr0, $vgpr300, $sgpr0, $sgpr1
+    $vgpr0 = V_PERMLANE16_B32_e64 0, killed $vgpr300, 0, killed $sgpr1, 0, killed $sgpr0, $vgpr0, 0, implicit $exec
+    S_ENDPGM 0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
index ecfc3cdcd215c..422baae280f9c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
@@ -94,6 +94,7 @@ body:             |
     ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
     ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -231,6 +232,7 @@ body:             |
     ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
     ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
     ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0


        


More information about the llvm-commits mailing list