[llvm-branch-commits] [llvm] [AMDGPU][NPM] Complete fast regalloc pipeline (PR #174096)
Vikram Hegde via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jan 18 21:50:37 PST 2026
https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/174096
>From 10ceffb21660dec8c0b23c7bbb4a0e3aa02cd4f1 Mon Sep 17 00:00:00 2001
From: vikhegde <vikram.hegde at amd.com>
Date: Wed, 31 Dec 2025 19:38:57 +0530
Subject: [PATCH] [AMDGPU][NPM] Complete fast regalloc pipeline
---
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 38 +++++++++++++++++++
llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 11 +++++-
2 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d25b22b2b96dc..f8a83e72bc3ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -154,8 +154,10 @@ class AMDGPUCodeGenPassBuilder
void addPostRegAlloc(PassManagerWrapper &PMW) const;
void addPreEmitPass(PassManagerWrapper &PMWM) const;
void addPreEmitRegAlloc(PassManagerWrapper &PMW) const;
+ Error addRegAssignmentFast(PassManagerWrapper &PMW) const;
Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const;
void addPreRegAlloc(PassManagerWrapper &PMW) const;
+ Error addFastRegAlloc(PassManagerWrapper &PMW) const;
void addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
void addPreSched2(PassManagerWrapper &PMW) const;
void addPostBBSections(PassManagerWrapper &PMW) const;
@@ -2311,6 +2313,42 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addMachineFunctionPass(SIShrinkInstructionsPass(), PMW);
}
+Error AMDGPUCodeGenPassBuilder::addFastRegAlloc(PassManagerWrapper &PMW) const {
+ insertPass<PHIEliminationPass>(SILowerControlFlowPass());
+
+ insertPass<TwoAddressInstructionPass>(SIWholeQuadModePass());
+
+ return Base::addFastRegAlloc(PMW);
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ PassManagerWrapper &PMW) const {
+ // TODO: handle default regalloc override error (with regalloc-npm)
+
+ addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW);
+
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
+ PMW);
+
+ // Equivalent of PEI for SGPRs.
+ addMachineFunctionPass(SILowerSGPRSpillsPass(), PMW);
+
+ // To Allocate wwm registers used in whole quad mode operations (for shaders).
+ addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW);
+
+ // For allocating other wwm register operands.
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}),
+ PMW);
+
+ addMachineFunctionPass(SILowerWWMCopiesPass(), PMW);
+ addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW);
+
+ // For allocating per-thread VGPRs.
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+
+ return Error::success();
+}
+
void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
PassManagerWrapper &PMW) const {
if (EnableDCEInRA)
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index ae64d33cc0b43..953f7e1a5e3c7 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -58,8 +58,17 @@
; GCN-O0-NEXT:require<reg-usage>
; GCN-O0-NEXT:cgscc(function(machine-function(reg-usage-propagation
; GCN-O0-NEXT:phi-node-elimination
+; GCN-O0-NEXT:si-lower-control-flow
; GCN-O0-NEXT:two-address-instruction
-; GCN-O0-NEXT:regallocfast
+; GCN-O0-NEXT:si-wqm
+; GCN-O0-NEXT:amdgpu-pre-ra-long-branch-reg
+; GCN-O0-NEXT:regallocfast<filter=sgpr;no-clear-vregs>
+; GCN-O0-NEXT:si-lower-sgpr-spills
+; GCN-O0-NEXT:si-pre-allocate-wwm-regs
+; GCN-O0-NEXT:regallocfast<filter=wwm;no-clear-vregs>
+; GCN-O0-NEXT:si-lower-wwm-copies
+; GCN-O0-NEXT:amdgpu-reserve-wwm-regs
+; GCN-O0-NEXT:regallocfast<filter=vgpr>
; GCN-O0-NEXT:si-fix-vgpr-copies
; GCN-O0-NEXT:remove-redundant-debug-values
; GCN-O0-NEXT:fixup-statepoint-caller-saved
More information about the llvm-branch-commits
mailing list