[llvm] [AMDGPU] Run AMDGPULowerVGPREncoding before hazard recognizers. NFCI. (PR #184987)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 6 02:57:41 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
This gives us the opportunity to fix hazards introduced by the VGPR
lowering pass.
---
Full diff: https://github.com/llvm/llvm-project/pull/184987.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+5-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5b3effbcc7179..ad5e158b6620c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1859,6 +1859,9 @@ void GCNPassConfig::addPreEmitPass() {
addPass(createAMDGPUSetWavePriorityPass());
if (getOptLevel() > CodeGenOptLevel::None)
addPass(&SIPreEmitPeepholeID);
+
+ addPass(&AMDGPULowerVGPREncodingLegacyID);
+
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
// are multiple scheduling regions in a basic block, the regions are scheduled
@@ -1871,8 +1874,6 @@ void GCNPassConfig::addPreEmitPass() {
addPass(&AMDGPUWaitSGPRHazardsLegacyID);
- addPass(&AMDGPULowerVGPREncodingLegacyID);
-
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
addPass(&AMDGPUInsertDelayAluID);
@@ -2575,6 +2576,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
if (TM.getOptLevel() > CodeGenOptLevel::None)
addMachineFunctionPass(SIPreEmitPeepholePass(), PMW);
+ addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
+
// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
// are multiple scheduling regions in a basic block, the regions are scheduled
@@ -2585,7 +2588,6 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
// cases.
addMachineFunctionPass(PostRAHazardRecognizerPass(), PMW);
addMachineFunctionPass(AMDGPUWaitSGPRHazardsPass(), PMW);
- addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
addMachineFunctionPass(AMDGPUInsertDelayAluPass(), PMW);
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 094ffe2b79715..af1e74539d8ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -81,9 +81,9 @@
; GCN-O0-NEXT: si-insert-waitcnts
; GCN-O0-NEXT: si-mode-register
; GCN-O0-NEXT: si-late-branch-lowering
+; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O0-NEXT: post-RA-hazard-rec
; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O0-NEXT: branch-relaxation
; GCN-O0-NEXT: reg-usage-collector
; GCN-O0-NEXT: remove-loads-into-fake-uses
@@ -251,9 +251,9 @@
; GCN-O2-NEXT: si-insert-hard-clauses
; GCN-O2-NEXT: si-late-branch-lowering
; GCN-O2-NEXT: si-pre-emit-peephole
+; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O2-NEXT: post-RA-hazard-rec
; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O2-NEXT: amdgpu-insert-delay-alu
; GCN-O2-NEXT: branch-relaxation
; GCN-O2-NEXT: reg-usage-collector
@@ -422,9 +422,9 @@
; GCN-O3-NEXT: si-insert-hard-clauses
; GCN-O3-NEXT: si-late-branch-lowering
; GCN-O3-NEXT: si-pre-emit-peephole
+; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O3-NEXT: post-RA-hazard-rec
; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
; GCN-O3-NEXT: amdgpu-insert-delay-alu
; GCN-O3-NEXT: branch-relaxation
; GCN-O3-NEXT: reg-usage-collector
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index abb6ccc5faadb..f940cbcb6401d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -145,9 +145,9 @@
; GCN-O0-NEXT: SI insert wait instructions
; GCN-O0-NEXT: Insert required mode register values
; GCN-O0-NEXT: SI Final Branch Preparation
+; GCN-O0-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O0-NEXT: Post RA hazard recognizer
; GCN-O0-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O0-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O0-NEXT: Branch relaxation pass
; GCN-O0-NEXT: Register Usage Information Collector Pass
; GCN-O0-NEXT: Remove Loads Into Fake Uses
@@ -436,9 +436,9 @@
; GCN-O1-NEXT: SI Insert Hard Clauses
; GCN-O1-NEXT: SI Final Branch Preparation
; GCN-O1-NEXT: SI peephole optimizations
+; GCN-O1-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-NEXT: Post RA hazard recognizer
; GCN-O1-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-NEXT: Branch relaxation pass
; GCN-O1-NEXT: Register Usage Information Collector Pass
@@ -755,9 +755,9 @@
; GCN-O1-OPTS-NEXT: SI Insert Hard Clauses
; GCN-O1-OPTS-NEXT: SI Final Branch Preparation
; GCN-O1-OPTS-NEXT: SI peephole optimizations
+; GCN-O1-OPTS-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-OPTS-NEXT: Post RA hazard recognizer
; GCN-O1-OPTS-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-OPTS-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O1-OPTS-NEXT: AMDGPU Insert Delay ALU
; GCN-O1-OPTS-NEXT: Branch relaxation pass
; GCN-O1-OPTS-NEXT: Register Usage Information Collector Pass
@@ -1079,9 +1079,9 @@
; GCN-O2-NEXT: SI Insert Hard Clauses
; GCN-O2-NEXT: SI Final Branch Preparation
; GCN-O2-NEXT: SI peephole optimizations
+; GCN-O2-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O2-NEXT: Post RA hazard recognizer
; GCN-O2-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O2-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O2-NEXT: AMDGPU Insert Delay ALU
; GCN-O2-NEXT: Branch relaxation pass
; GCN-O2-NEXT: Register Usage Information Collector Pass
@@ -1416,9 +1416,9 @@
; GCN-O3-NEXT: SI Insert Hard Clauses
; GCN-O3-NEXT: SI Final Branch Preparation
; GCN-O3-NEXT: SI peephole optimizations
+; GCN-O3-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O3-NEXT: Post RA hazard recognizer
; GCN-O3-NEXT: AMDGPU Insert waits for SGPR read hazards
-; GCN-O3-NEXT: AMDGPU Lower VGPR Encoding
; GCN-O3-NEXT: AMDGPU Insert Delay ALU
; GCN-O3-NEXT: Branch relaxation pass
; GCN-O3-NEXT: Register Usage Information Collector Pass
``````````
</details>
https://github.com/llvm/llvm-project/pull/184987
More information about the llvm-commits
mailing list