[llvm] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped (PR #184904)

Sat Mar 7 09:40:28 PST 2026

https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/184904

>From 2230812dc4afe1ef4cb0d05eae1b62942400b527 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 6 Mar 2026 10:55:36 +0000
Subject: [PATCH 1/2] [AMDGPU] Run AMDGPULowerVGPREncoding before hazard
 recognizers. NFCI.

This gives us the opportunity to fix hazards introduced by the VGPR
lowering pass.
---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  8 +++++---
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll   |  6 +++---
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll       | 10 +++++-----
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5b3effbcc7179..ad5e158b6620c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1859,6 +1859,9 @@ void GCNPassConfig::addPreEmitPass() {
     addPass(createAMDGPUSetWavePriorityPass());
   if (getOptLevel() > CodeGenOptLevel::None)
     addPass(&SIPreEmitPeepholeID);
+
+  addPass(&AMDGPULowerVGPREncodingLegacyID);
+
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
   // are multiple scheduling regions in a basic block, the regions are scheduled
@@ -1871,8 +1874,6 @@ void GCNPassConfig::addPreEmitPass() {
 
   addPass(&AMDGPUWaitSGPRHazardsLegacyID);
 
-  addPass(&AMDGPULowerVGPREncodingLegacyID);
-
   if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
     addPass(&AMDGPUInsertDelayAluID);
 
@@ -2575,6 +2576,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
   if (TM.getOptLevel() > CodeGenOptLevel::None)
     addMachineFunctionPass(SIPreEmitPeepholePass(), PMW);
 
+  addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
+
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
   // are multiple scheduling regions in a basic block, the regions are scheduled
@@ -2585,7 +2588,6 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
   // cases.
   addMachineFunctionPass(PostRAHazardRecognizerPass(), PMW);
   addMachineFunctionPass(AMDGPUWaitSGPRHazardsPass(), PMW);
-  addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
 
   if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
     addMachineFunctionPass(AMDGPUInsertDelayAluPass(), PMW);
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 094ffe2b79715..af1e74539d8ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -81,9 +81,9 @@
 ; GCN-O0-NEXT: si-insert-waitcnts
 ; GCN-O0-NEXT: si-mode-register
 ; GCN-O0-NEXT: si-late-branch-lowering
+; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O0-NEXT: post-RA-hazard-rec
 ; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O0-NEXT: branch-relaxation
 ; GCN-O0-NEXT: reg-usage-collector
 ; GCN-O0-NEXT: remove-loads-into-fake-uses
@@ -251,9 +251,9 @@
 ; GCN-O2-NEXT: si-insert-hard-clauses
 ; GCN-O2-NEXT: si-late-branch-lowering
 ; GCN-O2-NEXT: si-pre-emit-peephole
+; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O2-NEXT: post-RA-hazard-rec
 ; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O2-NEXT: amdgpu-insert-delay-alu
 ; GCN-O2-NEXT: branch-relaxation
 ; GCN-O2-NEXT: reg-usage-collector
@@ -422,9 +422,9 @@
 ; GCN-O3-NEXT: si-insert-hard-clauses
 ; GCN-O3-NEXT: si-late-branch-lowering
 ; GCN-O3-NEXT: si-pre-emit-peephole
+; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O3-NEXT: post-RA-hazard-rec
 ; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O3-NEXT: amdgpu-insert-delay-alu
 ; GCN-O3-NEXT: branch-relaxation
 ; GCN-O3-NEXT: reg-usage-collector
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index abb6ccc5faadb..f940cbcb6401d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -145,9 +145,9 @@
 ; GCN-O0-NEXT:        SI insert wait instructions
 ; GCN-O0-NEXT:        Insert required mode register values
 ; GCN-O0-NEXT:        SI Final Branch Preparation
+; GCN-O0-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O0-NEXT:        Post RA hazard recognizer
 ; GCN-O0-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O0-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O0-NEXT:        Branch relaxation pass
 ; GCN-O0-NEXT:        Register Usage Information Collector Pass
 ; GCN-O0-NEXT:        Remove Loads Into Fake Uses
@@ -436,9 +436,9 @@
 ; GCN-O1-NEXT:        SI Insert Hard Clauses
 ; GCN-O1-NEXT:        SI Final Branch Preparation
 ; GCN-O1-NEXT:        SI peephole optimizations
+; GCN-O1-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-NEXT:        Post RA hazard recognizer
 ; GCN-O1-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-NEXT:        Branch relaxation pass
 ; GCN-O1-NEXT:        Register Usage Information Collector Pass
@@ -755,9 +755,9 @@
 ; GCN-O1-OPTS-NEXT:        SI Insert Hard Clauses
 ; GCN-O1-OPTS-NEXT:        SI Final Branch Preparation
 ; GCN-O1-OPTS-NEXT:        SI peephole optimizations
+; GCN-O1-OPTS-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-OPTS-NEXT:        Post RA hazard recognizer
 ; GCN-O1-OPTS-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-OPTS-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-OPTS-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-OPTS-NEXT:        Branch relaxation pass
 ; GCN-O1-OPTS-NEXT:        Register Usage Information Collector Pass
@@ -1079,9 +1079,9 @@
 ; GCN-O2-NEXT:        SI Insert Hard Clauses
 ; GCN-O2-NEXT:        SI Final Branch Preparation
 ; GCN-O2-NEXT:        SI peephole optimizations
+; GCN-O2-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O2-NEXT:        Post RA hazard recognizer
 ; GCN-O2-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O2-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O2-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O2-NEXT:        Branch relaxation pass
 ; GCN-O2-NEXT:        Register Usage Information Collector Pass
@@ -1416,9 +1416,9 @@
 ; GCN-O3-NEXT:        SI Insert Hard Clauses
 ; GCN-O3-NEXT:        SI Final Branch Preparation
 ; GCN-O3-NEXT:        SI peephole optimizations
+; GCN-O3-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O3-NEXT:        Post RA hazard recognizer
 ; GCN-O3-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O3-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O3-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O3-NEXT:        Branch relaxation pass
 ; GCN-O3-NEXT:        Register Usage Information Collector Pass

>From 6a0a8d7ec09fae2819427e17d7d4a8a24d6afad1 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 4 Mar 2026 19:42:17 -0500
Subject: [PATCH 2/2] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped after
 S_SETREG_IMM32_B32 (MODE)

On GFX1250, S_SET_VGPR_MSB is silently dropped when immediately following
S_SETREG_IMM32_B32 targeting the MODE register.

For Case 2 (size > 12), where imm32[12:19] is part of the MODE value and
cannot be freely modified, GCNHazardRecognizer predicts whether
AMDGPULowerVGPREncoding will place S_SET_VGPR_MSB after the setreg and
inserts S_NOPs to prevent the hazard. AMDGPULowerVGPREncoding then skips
over these S_NOPs when placing S_SET_VGPR_MSB.

The prediction (willSetregNeedVGPRMSB) mirrors handleSetregMode's logic:
- If imm[12:19] matches the next VALU's MSB, no S_SET_VGPR_MSB is needed
- If no preceding high VGPRs are in use, handleSetregMode won't insert
- Otherwise, S_NOPs are inserted to separate the instructions

Shared MSB computation utilities (OpMode, ModeTy, computeMode, etc.) are
extracted into AMDGPUVGPREncoding.h for use by both passes.

The number of S_NOPs is configurable via -amdgpu-setreg-vgpr-msb-nops
(default: 1). A debug assertion verifies no back-to-back setreg +
S_SET_VGPR_MSB remains after lowering.
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  24 ++++
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 +
 .../AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir | 120 ++++++++++++++++++
 .../CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir  |   4 +-
 4 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 30a9d1d2ab149..2b81d0689f9b6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1308,6 +1308,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
     fixScratchBaseForwardingHazard(MI);
   if (ST.setRegModeNeedsVNOPs())
     fixSetRegMode(MI);
+  if (ST.has1024AddressableVGPRs())
+    fixSetRegModeToVGPRMSBHazard(MI);
 }
 
 static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3865,3 +3867,25 @@ bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
   BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
   return true;
 }
+
+bool GCNHazardRecognizer::fixSetRegModeToVGPRMSBHazard(MachineInstr *MI) {
+  if (MI->getOpcode() != AMDGPU::S_SETREG_IMM32_B32)
+    return false;
+
+  auto [Id, Offset, Width] =
+      AMDGPU::Hwreg::HwregEncoding::decode(MI->getOperand(1).getImm());
+  (void)Offset;
+  (void)Width;
+  if (Id != AMDGPU::Hwreg::ID_MODE)
+    return false;
+
+  MachineBasicBlock *MBB = MI->getParent();
+  auto Next = std::next(MI->getIterator());
+  while (Next != MBB->instr_end() && Next->isMetaInstruction())
+    ++Next;
+  if (Next == MBB->instr_end() || Next->getOpcode() != AMDGPU::S_SET_VGPR_MSB)
+    return false;
+
+  BuildMI(*MBB, Next, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)).addImm(0);
+  return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index b331504d40113..52b54e8581f6b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -137,6 +137,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
   bool fixScratchBaseForwardingHazard(MachineInstr *MI);
   bool fixSetRegMode(MachineInstr *MI);
+  bool fixSetRegModeToVGPRMSBHazard(MachineInstr *MI);
 
   int checkMAIHazards(MachineInstr *MI);
   int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
new file mode 100644
index 0000000000000..a80d67ac8212b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding,post-RA-hazard-rec -o - %s | FileCheck %s
+
+# Test handling of the GFX1250 hardware hazard where S_SET_VGPR_MSB immediately
+# after S_SETREG_IMM32_B32 (MODE) is silently dropped.
+#
+# AMDGPULowerVGPREncoding may place S_SET_VGPR_MSB after S_SETREG_IMM32_B32
+# (MODE) in Case 2 (size > 12). GCNHazardRecognizer then detects this sequence
+# and inserts S_NOPs between them to prevent the hazard.
+
+---
+# Case 2 mismatch: setreg (size=16) with imm32[12:19] that doesn't match
+# current VGPR MSB. AMDGPULowerVGPREncoding inserts S_SET_VGPR_MSB after the
+# setreg; GCNHazardRecognizer inserts S_NOP between them.
+name: setreg_mode_size_gt_12_mismatch
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_mismatch
+    ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    ; imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23, doesn't match VGPR MSB mode)
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 with different next MSB: setreg (size=16) with imm32[12:19] that
+# doesn't match current VGPR MSB. S_SET_VGPR_MSB is inserted to restore
+# current mode, then another S_SET_VGPR_MSB for the next VALU (v512/v513).
+# GCNHazardRecognizer inserts S_NOP between setreg and S_SET_VGPR_MSB.
+name: setreg_mode_size_gt_12_matches_next
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_matches_next
+    ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
+    ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    ; imm32 = 0xAABC = 43708 (bits 12:19 = 0xA = 10, matches next MSB for v512/v513)
+    S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+    $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+    S_ENDPGM 0
+...
+
+---
+# No hazard: S_SETREG_IMM32_B32 targeting non-MODE register.
+name: setreg_non_mode_no_hazard
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_non_mode_no_hazard
+    ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 16384, implicit-def $mode
+    ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(STATUS, 2, 2): simm16 = 2 | (2 << 6) | (1 << 11) = 0x882 = 2178
+    S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+    $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 but no high VGPRs before setreg. The lowering pass still inserts
+# S_SET_VGPR_MSB 0 (redundant) and the hazard recognizer inserts S_NOP.
+name: setreg_mode_size_gt_12_no_high_vgpr
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_no_high_vgpr
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    S_ENDPGM 0
+...
+
+---
+# Case 2 with high VGPR only AFTER setreg: setreg (size=16) with low VGPRs
+# before but high VGPRs after. AMDGPULowerVGPREncoding's setMode inserts
+# S_SET_VGPR_MSB before the next VALU (which is right after setreg).
+# GCNHazardRecognizer detects the setreg + S_SET_VGPR_MSB and inserts S_NOP.
+name: setreg_mode_size_gt_12_high_vgpr_after
+tracksRegLiveness: true
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: setreg_mode_size_gt_12_high_vgpr_after
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
+    ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+    ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+    ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0
+    $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+    S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
index ecfc3cdcd215c..6559204f8feeb 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding,post-RA-hazard-rec -o - %s | FileCheck %s
 
 ---
 # Case 1a: Size < 12 (size=4), imm32[12:19]=0
@@ -94,6 +94,7 @@ body:             |
     ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
     ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -231,6 +232,7 @@ body:             |
     ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
     ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
     ; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
+    ; CHECK-NEXT: S_NOP 0
     ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
     ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0