[llvm] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped (PR #184904)
Yaxun Liu via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 09:40:28 PST 2026
https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/184904
>From 2230812dc4afe1ef4cb0d05eae1b62942400b527 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 6 Mar 2026 10:55:36 +0000
Subject: [PATCH 1/2] [AMDGPU] Run AMDGPULowerVGPREncoding before hazard
recognizers. NFCI.
This gives us the opportunity to fix hazards introduced by the VGPR
lowering pass.
---
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 +++++---
llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 6 +++---
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 +++++-----
3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5b3effbcc7179..ad5e158b6620c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1859,6 +1859,9 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createAMDGPUSetWavePriorityPass());
if (getOptLevel() > CodeGenOptLevel::None)
addPass(&SIPreEmitPeepholeID);
+
+ addPass(&AMDGPULowerVGPREncodingLegacyID);
+
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
// are multiple scheduling regions in a basic block, the regions are scheduled
@@ -1871,8 +1874,6 @@ void GCNPassConfig::addPreEmitPass() {
addPass(&AMDGPUWaitSGPRHazardsLegacyID);
- addPass(&AMDGPULowerVGPREncodingLegacyID);
-
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
@@ -2575,6 +2576,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
if (TM.getOptLevel() > CodeGenOptLevel::None)
addMachineFunctionPass(SIPreEmitPeepholePass(), PMW);
+ addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
+
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
// are multiple scheduling regions in a basic block, the regions are scheduled
@@ -2585,7 +2588,6 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
// cases.
addMachineFunctionPass(PostRAHazardRecognizerPass(), PMW);
addMachineFunctionPass(AMDGPUWaitSGPRHazardsPass(), PMW);
- addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
addMachineFunctionPass(AMDGPUInsertDelayAluPass(), PMW);
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 094ffe2b79715..af1e74539d8ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -81,9 +81,9 @@
; GCN-O0-NEXT: si-insert-waitcnts
; GCN-O0-NEXT: si-mode-register
; GCN-O0-NEXT: si-late-branch-lowering
+; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O0-NEXT: post-RA-hazard-rec
; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O0-NEXT: branch-relaxation
; GCN-O0-NEXT: reg-usage-collector
; GCN-O0-NEXT: remove-loads-into-fake-uses
@@ -251,9 +251,9 @@
; GCN-O2-NEXT: si-insert-hard-clauses
; GCN-O2-NEXT: si-late-branch-lowering
; GCN-O2-NEXT: si-pre-emit-peephole
+; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O2-NEXT: post-RA-hazard-rec
; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O2-NEXT: amdgpu-insert-delay-alu
; GCN-O2-NEXT: branch-relaxation
; GCN-O2-NEXT: reg-usage-collector
@@ -422,9 +422,9 @@
; GCN-O3-NEXT: si-insert-hard-clauses
; GCN-O3-NEXT: si-late-branch-lowering
; GCN-O3-NEXT: si-pre-emit-peephole
+; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O3-NEXT: post-RA-hazard-rec
; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O3-NEXT: amdgpu-insert-delay-alu
; GCN-O3-NEXT: branch-relaxation
; GCN-O3-NEXT: reg-usage-collector
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index abb6ccc5faadb..f940cbcb6401d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -145,9 +145,9 @@
; GCN-O0-NEXT: SI insert wait instructions
; GCN-O0-NEXT: Insert required mode register values
; GCN-O0-NEXT: SI Final Branch Preparation
+; GCN-O0-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O0-NEXT: Post RA hazard recognizer
; GCN-O0-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O0-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O0-NEXT: Branch relaxation pass
; GCN-O0-NEXT: Register Usage Information Collector Pass
; GCN-O0-NEXT: Remove Loads Into Fake Uses
@@ -436,9 +436,9 @@
; GCN-O1-NEXT: SI Insert Hard Clauses
; GCN-O1-NEXT: SI Final Branch Preparation
; GCN-O1-NEXT: SI peephole optimizations
+; GCN-O1-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-NEXT: Post RA hazard recognizer
; GCN-O1-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-NEXT: Branch relaxation pass
; GCN-O1-NEXT: Register Usage Information Collector Pass
@@ -755,9 +755,9 @@
; GCN-O1-OPTS-NEXT: SI Insert Hard Clauses
; GCN-O1-OPTS-NEXT: SI Final Branch Preparation
; GCN-O1-OPTS-NEXT: SI peephole optimizations
+; GCN-O1-OPTS-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-OPTS-NEXT: Post RA hazard recognizer
; GCN-O1-OPTS-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-OPTS-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-OPTS-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-OPTS-NEXT: Branch relaxation pass
; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass
@@ -1079,9 +1079,9 @@
; GCN-O2-NEXT: SI Insert Hard Clauses
; GCN-O2-NEXT: SI Final Branch Preparation
; GCN-O2-NEXT: SI peephole optimizations
+; GCN-O2-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O2-NEXT: Post RA hazard recognizer
; GCN-O2-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O2-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O2-NEXT: AMDGPU Insert Delay ALU
; GCN-O2-NEXT: Branch relaxation pass
; GCN-O2-NEXT: Register Usage Information Collector Pass
@@ -1416,9 +1416,9 @@
; GCN-O3-NEXT: SI Insert Hard Clauses
; GCN-O3-NEXT: SI Final Branch Preparation
; GCN-O3-NEXT: SI peephole optimizations
+; GCN-O3-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O3-NEXT: Post RA hazard recognizer
; GCN-O3-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O3-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O3-NEXT: AMDGPU Insert Delay ALU
; GCN-O3-NEXT: Branch relaxation pass
; GCN-O3-NEXT: Register Usage Information Collector Pass
>From 6a0a8d7ec09fae2819427e17d7d4a8a24d6afad1 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Wed, 4 Mar 2026 19:42:17 -0500
Subject: [PATCH 2/2] [AMDGPU] Fix GFX1250 hazard: S_SET_VGPR_MSB dropped after
S_SETREG_IMM32_B32 (MODE)
On GFX1250, S_SET_VGPR_MSB is silently dropped when immediately following
S_SETREG_IMM32_B32 targeting the MODE register.
For Case 2 (size > 12), where imm32[12:19] is part of the MODE value and
cannot be freely modified, GCNHazardRecognizer predicts whether
AMDGPULowerVGPREncoding will place S_SET_VGPR_MSB after the setreg and
inserts S_NOPs to prevent the hazard. AMDGPULowerVGPREncoding then skips
over these S_NOPs when placing S_SET_VGPR_MSB.
The prediction (willSetregNeedVGPRMSB) mirrors handleSetregMode's logic:
- If imm[12:19] matches the next VALU's MSB, no S_SET_VGPR_MSB is needed
- If no preceding high VGPRs are in use, handleSetregMode won't insert
- Otherwise, S_NOPs are inserted to separate the instructions
Shared MSB computation utilities (OpMode, ModeTy, computeMode, etc.) are
extracted into AMDGPUVGPREncoding.h for use by both passes.
The number of S_NOPs is configurable via -amdgpu-setreg-vgpr-msb-nops
(default: 1). A debug assertion verifies no back-to-back setreg +
S_SET_VGPR_MSB remains after lowering.
---
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 24 ++++
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 +
.../AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir | 120 ++++++++++++++++++
.../CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir | 4 +-
4 files changed, 148 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 30a9d1d2ab149..2b81d0689f9b6 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1308,6 +1308,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
fixScratchBaseForwardingHazard(MI);
if (ST.setRegModeNeedsVNOPs())
fixSetRegMode(MI);
+ if (ST.has1024AddressableVGPRs())
+ fixSetRegModeToVGPRMSBHazard(MI);
}
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3865,3 +3867,25 @@ bool GCNHazardRecognizer::fixSetRegMode(MachineInstr *MI) {
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII.get(AMDGPU::V_NOP_e32));
return true;
}
+
+bool GCNHazardRecognizer::fixSetRegModeToVGPRMSBHazard(MachineInstr *MI) {
+ if (MI->getOpcode() != AMDGPU::S_SETREG_IMM32_B32)
+ return false;
+
+ auto [Id, Offset, Width] =
+ AMDGPU::Hwreg::HwregEncoding::decode(MI->getOperand(1).getImm());
+ (void)Offset;
+ (void)Width;
+ if (Id != AMDGPU::Hwreg::ID_MODE)
+ return false;
+
+ MachineBasicBlock *MBB = MI->getParent();
+ auto Next = std::next(MI->getIterator());
+ while (Next != MBB->instr_end() && Next->isMetaInstruction())
+ ++Next;
+ if (Next == MBB->instr_end() || Next->getOpcode() != AMDGPU::S_SET_VGPR_MSB)
+ return false;
+
+ BuildMI(*MBB, Next, MI->getDebugLoc(), TII.get(AMDGPU::S_NOP)).addImm(0);
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index b331504d40113..52b54e8581f6b 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -137,6 +137,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
bool fixSetRegMode(MachineInstr *MI);
+ bool fixSetRegModeToVGPRMSBHazard(MachineInstr *MI);
int checkMAIHazards(MachineInstr *MI);
int checkMAIHazards908(MachineInstr *MI);
diff --git a/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
new file mode 100644
index 0000000000000..a80d67ac8212b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hazard-setreg-vgpr-msb-gfx1250.mir
@@ -0,0 +1,120 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding,post-RA-hazard-rec -o - %s | FileCheck %s
+
+# Test handling of the GFX1250 hardware hazard where S_SET_VGPR_MSB immediately
+# after S_SETREG_IMM32_B32 (MODE) is silently dropped.
+#
+# AMDGPULowerVGPREncoding may place S_SET_VGPR_MSB after S_SETREG_IMM32_B32
+# (MODE) in Case 2 (size > 12). GCNHazardRecognizer then detects this sequence
+# and inserts S_NOPs between them to prevent the hazard.
+
+---
+# Case 2 mismatch: setreg (size=16) with imm32[12:19] that doesn't match
+# current VGPR MSB. AMDGPULowerVGPREncoding inserts S_SET_VGPR_MSB after the
+# setreg; GCNHazardRecognizer inserts S_NOP between them.
+name: setreg_mode_size_gt_12_mismatch
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_mismatch
+ ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ ; imm32 = 0x23ABC = 146108 (bits 12:19 = 0x23, doesn't match VGPR MSB mode)
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# Case 2 with different next MSB: setreg (size=16) with imm32[12:19] that
+# doesn't match current VGPR MSB. S_SET_VGPR_MSB is inserted to restore
+# current mode, then another S_SET_VGPR_MSB for the next VALU (v512/v513).
+# GCNHazardRecognizer inserts S_NOP between setreg and S_SET_VGPR_MSB.
+name: setreg_mode_size_gt_12_matches_next
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_matches_next
+ ; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
+ ; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
+ ; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $vgpr257, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ ; imm32 = 0xAABC = 43708 (bits 12:19 = 0xA = 10, matches next MSB for v512/v513)
+ S_SETREG_IMM32_B32 43708, 30721, implicit-def $mode, implicit $mode
+ $vgpr512 = V_MOV_B32_e32 undef $vgpr513, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+# No hazard: S_SETREG_IMM32_B32 targeting non-MODE register.
+name: setreg_non_mode_no_hazard
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_non_mode_no_hazard
+ ; CHECK: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_SET_VGPR_MSB 16384, implicit-def $mode
+ ; CHECK-NEXT: $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(STATUS, 2, 2): simm16 = 2 | (2 << 6) | (1 << 11) = 0x882 = 2178
+ S_SETREG_IMM32_B32 0, 2178, implicit-def $mode, implicit $mode
+ $vgpr0 = V_ADD_F32_e32 undef $vgpr1, undef $vgpr2, implicit $exec, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# Case 2 but no high VGPRs before setreg. The lowering pass still inserts
+# S_SET_VGPR_MSB 0 (redundant) and the hazard recognizer inserts S_NOP.
+name: setreg_mode_size_gt_12_no_high_vgpr
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_no_high_vgpr
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ S_ENDPGM 0
+...
+
+---
+# Case 2 with high VGPR only AFTER setreg: setreg (size=16) with low VGPRs
+# before but high VGPRs after. AMDGPULowerVGPREncoding's setMode inserts
+# S_SET_VGPR_MSB before the next VALU (which is right after setreg).
+# GCNHazardRecognizer detects the setreg + S_SET_VGPR_MSB and inserts S_NOP.
+name: setreg_mode_size_gt_12_high_vgpr_after
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: setreg_mode_size_gt_12_high_vgpr_after
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
+ ; CHECK-NEXT: S_SET_VGPR_MSB 0, implicit-def $mode
+ ; CHECK-NEXT: S_SET_VGPR_MSB 64, implicit-def $mode
+ ; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
+ $vgpr0 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ ; hwreg(MODE, 0, 16): simm16 = 0x7801 = 30721
+ S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ $vgpr256 = V_MOV_B32_e32 undef $sgpr0, implicit $exec
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
index ecfc3cdcd215c..6559204f8feeb 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-setreg-mode-swar.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -run-pass=amdgpu-lower-vgpr-encoding,post-RA-hazard-rec -o - %s | FileCheck %s
---
# Case 1a: Size < 12 (size=4), imm32[12:19]=0
@@ -94,6 +94,7 @@ body: |
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 146108, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: S_ENDPGM 0
$vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
@@ -231,6 +232,7 @@ body: |
; CHECK: S_SET_VGPR_MSB 65, implicit-def $mode
; CHECK-NEXT: $vgpr256 = V_MOV_B32_e32 $vgpr257, implicit $exec
; CHECK-NEXT: S_SETREG_IMM32_B32 23228, 30721, implicit-def $mode, implicit $mode
+ ; CHECK-NEXT: S_NOP 0
; CHECK-NEXT: S_SET_VGPR_MSB 16770, implicit-def $mode
; CHECK-NEXT: $vgpr512 = V_MOV_B32_e32 $vgpr513, implicit $exec
; CHECK-NEXT: S_ENDPGM 0
More information about the llvm-commits
mailing list