[llvm] a03f82d - [AMDGPU][NPM] Add target-specific register allocation options (#178889)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 3 21:57:52 PST 2026
Author: Teja Alaghari
Date: 2026-02-04T11:27:47+05:30
New Revision: a03f82d7e582078163bce7ecaf960e8a53fb4744
URL: https://github.com/llvm/llvm-project/commit/a03f82d7e582078163bce7ecaf960e8a53fb4744
DIFF: https://github.com/llvm/llvm-project/commit/a03f82d7e582078163bce7ecaf960e8a53fb4744.diff
LOG: [AMDGPU][NPM] Add target-specific register allocation options (#178889)
Add below AMDGPU-specific options for its SGPR, WWM & VGPR
registers allocation in NPM -
- `-sgpr-regalloc-npm`
- `-wwm-regalloc-npm`
- `-vgpr-regalloc-npm`
Added:
Modified:
llvm/include/llvm/Passes/CodeGenPassBuilder.h
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6942fc42ca721..68d13fefd7d8b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -472,7 +472,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
/// addOptimizedRegAlloc - Add passes related to register allocation.
/// CodeGenTargetMachineImpl provides standard regalloc passes for most
/// targets.
- void addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
+ Error addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
/// Add passes that optimize machine instructions after register allocation.
void addMachineLateOptimization(PassManagerWrapper &PMW) const;
@@ -505,10 +505,10 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
/// regalloc pass.
void addRegAllocPass(PassManagerWrapper &PMW, bool Optimized) const;
- /// Add core register alloator passes which do the actual register assignment
- /// and rewriting. \returns true if any passes were added.
+ /// Add core register allocator passes which do the actual register assignment
+ /// and rewriting.
Error addRegAssignmentFast(PassManagerWrapper &PMW) const;
- Error addRegAssignmentOptimized(PassManagerWrapper &PMWM) const;
+ Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const;
/// Allow the target to disable a specific pass by default.
/// Backend can declare unwanted passes in constructor.
@@ -977,12 +977,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
- if (*Opt.OptimizeRegAlloc) {
- derived().addOptimizedRegAlloc(PMW);
- } else {
- if (auto Err = derived().addFastRegAlloc(PMW))
- return Err;
- }
+ if (auto Err = *Opt.OptimizeRegAlloc ? derived().addOptimizedRegAlloc(PMW)
+ : derived().addFastRegAlloc(PMW))
+ return std::move(Err);
// Run post-ra passes.
derived().addPostRegAlloc(PMW);
@@ -1212,7 +1209,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addFastRegAlloc(
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
template <typename Derived, typename TargetMachineT>
-void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
+Error CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
PassManagerWrapper &PMW) const {
addMachineFunctionPass(DetectDeadLanesPass(), PMW);
@@ -1255,10 +1252,8 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addMachineFunctionPass(MachineSchedulerPass(&TM), PMW);
- if (auto E = derived().addRegAssignmentOptimized(PMW)) {
- // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
- return;
- }
+ if (auto E = derived().addRegAssignmentOptimized(PMW))
+ return std::move(E);
addMachineFunctionPass(StackSlotColoringPass(), PMW);
@@ -1274,6 +1269,8 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
//
// FIXME: can this move into MachineLateOptimization?
addMachineFunctionPass(MachineLICMPass(), PMW);
+
+ return Error::success();
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7881623bcedd3..49c60c254f6f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -159,10 +159,14 @@ class AMDGPUCodeGenPassBuilder
Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const;
void addPreRegAlloc(PassManagerWrapper &PMW) const;
Error addFastRegAlloc(PassManagerWrapper &PMW) const;
- void addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
+ Error addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
void addPreSched2(PassManagerWrapper &PMW) const;
void addPostBBSections(PassManagerWrapper &PMW) const;
+private:
+ Error validateRegAllocOptions() const;
+
+public:
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
/// given that a pass shall work at an optimization \p Level minimum.
@@ -244,6 +248,63 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
cl::init(&useDefaultRegisterAllocator),
cl::desc("Register allocator to use for WWM registers"));
+// New pass manager register allocator options for AMDGPU
+static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocNPM(
+ "sgpr-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+ cl::desc("Register allocator for SGPRs (new pass manager)"));
+
+static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocNPM(
+ "vgpr-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+ cl::desc("Register allocator for VGPRs (new pass manager)"));
+
+static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocNPM(
+ "wwm-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+ cl::desc("Register allocator for WWM registers (new pass manager)"));
+
+/// Check if the given RegAllocType is supported for AMDGPU NPM register
+/// allocation. Only Fast and Greedy are supported; Basic and PBQP are not.
+static Error checkRegAllocSupported(RegAllocType RAType, StringRef RegName) {
+ if (RAType == RegAllocType::Basic || RAType == RegAllocType::PBQP) {
+ return make_error<StringError>(
+ Twine("unsupported register allocator '") +
+ (RAType == RegAllocType::Basic ? "basic" : "pbqp") + "' for " +
+ RegName + " registers",
+ inconvertibleErrorCode());
+ }
+ return Error::success();
+}
+
+Error AMDGPUCodeGenPassBuilder::validateRegAllocOptions() const {
+ // 1. Generic --regalloc-npm is not supported for AMDGPU.
+ if (Opt.RegAlloc != RegAllocType::Unset) {
+ return make_error<StringError>(
+ "-regalloc-npm not supported for amdgcn. Use -sgpr-regalloc-npm, "
+ "-vgpr-regalloc-npm, and -wwm-regalloc-npm",
+ inconvertibleErrorCode());
+ }
+
+ // 2. Legacy PM regalloc options are not compatible with NPM.
+ if (SGPRRegAlloc.getNumOccurrences() > 0 ||
+ VGPRRegAlloc.getNumOccurrences() > 0 ||
+ WWMRegAlloc.getNumOccurrences() > 0) {
+ return make_error<StringError>(
+ "-sgpr-regalloc, -vgpr-regalloc, and -wwm-regalloc are legacy PM "
+ "options. Use -sgpr-regalloc-npm, -vgpr-regalloc-npm, and "
+ "-wwm-regalloc-npm with the new pass manager",
+ inconvertibleErrorCode());
+ }
+
+ // 3. Only Fast and Greedy allocators are supported for AMDGPU.
+ if (auto Err = checkRegAllocSupported(SGPRRegAllocNPM, "SGPR"))
+ return Err;
+ if (auto Err = checkRegAllocSupported(WWMRegAllocNPM, "WWM"))
+ return Err;
+ if (auto Err = checkRegAllocSupported(VGPRRegAllocNPM, "VGPR"))
+ return Err;
+
+ return Error::success();
+}
+
static void initializeDefaultSGPRRegisterAllocatorOnce() {
RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
@@ -2327,12 +2388,17 @@ Error AMDGPUCodeGenPassBuilder::addFastRegAlloc(PassManagerWrapper &PMW) const {
Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
PassManagerWrapper &PMW) const {
- // TODO: handle default regalloc override error (with regalloc-npm)
+ if (auto Err = validateRegAllocOptions())
+ return Err;
addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW);
- addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
- PMW);
+ // SGPR allocation - default to fast at -O0.
+ if (SGPRRegAllocNPM == RegAllocType::Greedy)
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
+ else
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
+ PMW);
// Equivalent of PEI for SGPRs.
addMachineFunctionPass(SILowerSGPRSpillsPass(), PMW);
@@ -2340,20 +2406,26 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
// To Allocate wwm registers used in whole quad mode operations (for shaders).
addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW);
- // For allocating other wwm register operands.
- addMachineFunctionPass(RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}),
- PMW);
+ // WWM allocation - default to fast at -O0.
+ if (WWMRegAllocNPM == RegAllocType::Greedy)
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
+ else
+ addMachineFunctionPass(
+ RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}), PMW);
addMachineFunctionPass(SILowerWWMCopiesPass(), PMW);
addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW);
- // For allocating per-thread VGPRs.
- addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+ // VGPR allocation - default to fast at -O0.
+ if (VGPRRegAllocNPM == RegAllocType::Greedy)
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+ else
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
return Error::success();
}
-void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
+Error AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
PassManagerWrapper &PMW) const {
if (EnableDCEInRA)
insertPass<DetectDeadLanesPass>(DeadMachineInstructionElimPass());
@@ -2389,7 +2461,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
if (TM.getOptLevel() > CodeGenOptLevel::Less)
insertPass<MachineSchedulerPass>(SIFormMemoryClausesPass());
- Base::addOptimizedRegAlloc(PMW);
+ return Base::addOptimizedRegAlloc(PMW);
}
void AMDGPUCodeGenPassBuilder::addPreRegAlloc(PassManagerWrapper &PMW) const {
@@ -2399,11 +2471,17 @@ void AMDGPUCodeGenPassBuilder::addPreRegAlloc(PassManagerWrapper &PMW) const {
Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
PassManagerWrapper &PMW) const {
- // TODO: Check --regalloc-npm option
+ if (auto Err = validateRegAllocOptions())
+ return Err;
addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW);
- addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
+ // SGPR allocation - default to greedy at -O1 and above.
+ if (SGPRRegAllocNPM == RegAllocType::Fast)
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
+ PMW);
+ else
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
// Commit allocated register changes. This is mostly necessary because too
// many things rely on the use lists of the physical registers, such as the
@@ -2422,14 +2500,21 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
// To Allocate wwm registers used in whole quad mode operations (for shaders).
addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW);
- // For allocating other wwm register operands.
- addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
+ // WWM allocation - default to greedy at -O1 and above.
+ if (WWMRegAllocNPM == RegAllocType::Fast)
+ addMachineFunctionPass(
+ RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}), PMW);
+ else
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
addMachineFunctionPass(SILowerWWMCopiesPass(), PMW);
addMachineFunctionPass(VirtRegRewriterPass(false), PMW);
addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW);
- // For allocating per-thread VGPRs.
- addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+ // VGPR allocation - default to greedy at -O1 and above.
+ if (VGPRRegAllocNPM == RegAllocType::Fast)
+ addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+ else
+ addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
addPreRewrite(PMW);
addMachineFunctionPass(VirtRegRewriterPass(true), PMW);
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c..9823983f0f0bb 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -2,11 +2,65 @@
# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# Test default behavior at -O0: uses fast allocator
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O0 -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-O0
+
+# Test default behavior at -O2: uses greedy allocator
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O2 -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-O2
+
+# Test AMDGPU-specific NPM regalloc options
+# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=fast -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-FAST
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O3 -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=greedy -vgpr-regalloc-npm=greedy -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-GREEDY
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O3 -sgpr-regalloc-npm=fast -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-MIXED
+
+# Test error cases for unsupported allocators
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=basic -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-BASIC
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -vgpr-regalloc-npm=pbqp -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-PBQP
+
+# Test error when legacy PM options are used with NPM
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-LEGACY
+
+# Test error when generic --regalloc-npm is used with AMDGPU
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -regalloc-npm=fast -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-GENERIC
+
# PASS: regallocfast<filter=sgpr>
# PASS: regallocfast<filter=wwm>
# PASS: regallocfast<filter=vgpr>
# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# At -O0, default uses fast allocator for all register classes.
+# DEFAULT-O0: regallocfast<filter=sgpr
+# DEFAULT-O0: regallocfast<filter=wwm
+# DEFAULT-O0: regallocfast<filter=vgpr
+
+# At -O2, default uses greedy allocator for all register classes.
+# DEFAULT-O2: greedy<sgpr>
+# DEFAULT-O2: greedy<wwm>
+# DEFAULT-O2: greedy<vgpr>
+
+# NPM-FAST: regallocfast<filter=sgpr
+# NPM-FAST: regallocfast<filter=wwm
+# NPM-FAST: regallocfast<filter=vgpr
+
+# NPM-GREEDY: greedy<sgpr>
+# NPM-GREEDY: greedy<wwm>
+# NPM-GREEDY: greedy<vgpr>
+
+# At -O3, default is greedy. With -sgpr-regalloc-npm=fast, SGPR uses fast,
+# but WWM and VGPR still use greedy.
+# NPM-MIXED: regallocfast<filter=sgpr
+# NPM-MIXED: greedy<wwm>
+# NPM-MIXED: greedy<vgpr>
+
+# Error messages for unsupported allocators.
+# ERR-BASIC: unsupported register allocator 'basic' for SGPR registers
+# ERR-PBQP: unsupported register allocator 'pbqp' for VGPR registers
+
+# Error message for legacy PM options with NPM.
+# ERR-LEGACY: -sgpr-regalloc, -vgpr-regalloc, and -wwm-regalloc are legacy PM options
+
+# Error message for generic --regalloc-npm with AMDGPU.
+# ERR-GENERIC: -regalloc-npm not supported for amdgcn
---
name: f
...
More information about the llvm-commits
mailing list