[llvm] [AMDGPU] Run AMDGPULowerVGPREncoding before hazard recognizers. NFCI. (PR #184987)

Fri Mar 6 02:57:41 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

This gives us the opportunity to fix hazards introduced by the VGPR
lowering pass.


---
Full diff: https://github.com/llvm/llvm-project/pull/184987.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+5-5) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5b3effbcc7179..ad5e158b6620c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1859,6 +1859,9 @@ void GCNPassConfig::addPreEmitPass() {
     addPass(createAMDGPUSetWavePriorityPass());
   if (getOptLevel() > CodeGenOptLevel::None)
     addPass(&SIPreEmitPeepholeID);
+
+  addPass(&AMDGPULowerVGPREncodingLegacyID);
+
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
   // are multiple scheduling regions in a basic block, the regions are scheduled
@@ -1871,8 +1874,6 @@ void GCNPassConfig::addPreEmitPass() {
 
   addPass(&AMDGPUWaitSGPRHazardsLegacyID);
 
-  addPass(&AMDGPULowerVGPREncodingLegacyID);
-
   if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less))
     addPass(&AMDGPUInsertDelayAluID);
 
@@ -2575,6 +2576,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
   if (TM.getOptLevel() > CodeGenOptLevel::None)
     addMachineFunctionPass(SIPreEmitPeepholePass(), PMW);
 
+  addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
+
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if there
   // are multiple scheduling regions in a basic block, the regions are scheduled
@@ -2585,7 +2588,6 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(PassManagerWrapper &PMW) const {
   // cases.
   addMachineFunctionPass(PostRAHazardRecognizerPass(), PMW);
   addMachineFunctionPass(AMDGPUWaitSGPRHazardsPass(), PMW);
-  addMachineFunctionPass(AMDGPULowerVGPREncodingPass(), PMW);
 
   if (isPassEnabled(EnableInsertDelayAlu, CodeGenOptLevel::Less)) {
     addMachineFunctionPass(AMDGPUInsertDelayAluPass(), PMW);
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 094ffe2b79715..af1e74539d8ac 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -81,9 +81,9 @@
 ; GCN-O0-NEXT: si-insert-waitcnts
 ; GCN-O0-NEXT: si-mode-register
 ; GCN-O0-NEXT: si-late-branch-lowering
+; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O0-NEXT: post-RA-hazard-rec
 ; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O0-NEXT: branch-relaxation
 ; GCN-O0-NEXT: reg-usage-collector
 ; GCN-O0-NEXT: remove-loads-into-fake-uses
@@ -251,9 +251,9 @@
 ; GCN-O2-NEXT: si-insert-hard-clauses
 ; GCN-O2-NEXT: si-late-branch-lowering
 ; GCN-O2-NEXT: si-pre-emit-peephole
+; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O2-NEXT: post-RA-hazard-rec
 ; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O2-NEXT: amdgpu-insert-delay-alu
 ; GCN-O2-NEXT: branch-relaxation
 ; GCN-O2-NEXT: reg-usage-collector
@@ -422,9 +422,9 @@
 ; GCN-O3-NEXT: si-insert-hard-clauses
 ; GCN-O3-NEXT: si-late-branch-lowering
 ; GCN-O3-NEXT: si-pre-emit-peephole
+; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O3-NEXT: post-RA-hazard-rec
 ; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
 ; GCN-O3-NEXT: amdgpu-insert-delay-alu
 ; GCN-O3-NEXT: branch-relaxation
 ; GCN-O3-NEXT: reg-usage-collector
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index abb6ccc5faadb..f940cbcb6401d 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -145,9 +145,9 @@
 ; GCN-O0-NEXT:        SI insert wait instructions
 ; GCN-O0-NEXT:        Insert required mode register values
 ; GCN-O0-NEXT:        SI Final Branch Preparation
+; GCN-O0-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O0-NEXT:        Post RA hazard recognizer
 ; GCN-O0-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O0-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O0-NEXT:        Branch relaxation pass
 ; GCN-O0-NEXT:        Register Usage Information Collector Pass
 ; GCN-O0-NEXT:        Remove Loads Into Fake Uses
@@ -436,9 +436,9 @@
 ; GCN-O1-NEXT:        SI Insert Hard Clauses
 ; GCN-O1-NEXT:        SI Final Branch Preparation
 ; GCN-O1-NEXT:        SI peephole optimizations
+; GCN-O1-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-NEXT:        Post RA hazard recognizer
 ; GCN-O1-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-NEXT:        Branch relaxation pass
 ; GCN-O1-NEXT:        Register Usage Information Collector Pass
@@ -755,9 +755,9 @@
 ; GCN-O1-OPTS-NEXT:        SI Insert Hard Clauses
 ; GCN-O1-OPTS-NEXT:        SI Final Branch Preparation
 ; GCN-O1-OPTS-NEXT:        SI peephole optimizations
+; GCN-O1-OPTS-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-OPTS-NEXT:        Post RA hazard recognizer
 ; GCN-O1-OPTS-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O1-OPTS-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O1-OPTS-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O1-OPTS-NEXT:        Branch relaxation pass
 ; GCN-O1-OPTS-NEXT:        Register Usage Information Collector Pass
@@ -1079,9 +1079,9 @@
 ; GCN-O2-NEXT:        SI Insert Hard Clauses
 ; GCN-O2-NEXT:        SI Final Branch Preparation
 ; GCN-O2-NEXT:        SI peephole optimizations
+; GCN-O2-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O2-NEXT:        Post RA hazard recognizer
 ; GCN-O2-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O2-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O2-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O2-NEXT:        Branch relaxation pass
 ; GCN-O2-NEXT:        Register Usage Information Collector Pass
@@ -1416,9 +1416,9 @@
 ; GCN-O3-NEXT:        SI Insert Hard Clauses
 ; GCN-O3-NEXT:        SI Final Branch Preparation
 ; GCN-O3-NEXT:        SI peephole optimizations
+; GCN-O3-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O3-NEXT:        Post RA hazard recognizer
 ; GCN-O3-NEXT:        AMDGPU Insert waits for SGPR read hazards
-; GCN-O3-NEXT:        AMDGPU Lower VGPR Encoding
 ; GCN-O3-NEXT:        AMDGPU Insert Delay ALU
 ; GCN-O3-NEXT:        Branch relaxation pass
 ; GCN-O3-NEXT:        Register Usage Information Collector Pass

``````````

</details>


https://github.com/llvm/llvm-project/pull/184987