[llvm] a03f82d - [AMDGPU][NPM] Add target-specific register allocation options (#178889)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 3 21:57:52 PST 2026


Author: Teja Alaghari
Date: 2026-02-04T11:27:47+05:30
New Revision: a03f82d7e582078163bce7ecaf960e8a53fb4744

URL: https://github.com/llvm/llvm-project/commit/a03f82d7e582078163bce7ecaf960e8a53fb4744
DIFF: https://github.com/llvm/llvm-project/commit/a03f82d7e582078163bce7ecaf960e8a53fb4744.diff

LOG: [AMDGPU][NPM] Add target-specific register allocation options (#178889)

Add below AMDGPU-specific options for its SGPR, WWM & VGPR
registers allocation in NPM -
- `-sgpr-regalloc-npm`
- `-wwm-regalloc-npm`
- `-vgpr-regalloc-npm`

Added: 
    

Modified: 
    llvm/include/llvm/Passes/CodeGenPassBuilder.h
    llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
    llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 6942fc42ca721..68d13fefd7d8b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -472,7 +472,7 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   /// addOptimizedRegAlloc - Add passes related to register allocation.
   /// CodeGenTargetMachineImpl provides standard regalloc passes for most
   /// targets.
-  void addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
+  Error addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
 
   /// Add passes that optimize machine instructions after register allocation.
   void addMachineLateOptimization(PassManagerWrapper &PMW) const;
@@ -505,10 +505,10 @@ template <typename DerivedT, typename TargetMachineT> class CodeGenPassBuilder {
   /// regalloc pass.
   void addRegAllocPass(PassManagerWrapper &PMW, bool Optimized) const;
 
-  /// Add core register alloator passes which do the actual register assignment
-  /// and rewriting. \returns true if any passes were added.
+  /// Add core register allocator passes which do the actual register assignment
+  /// and rewriting.
   Error addRegAssignmentFast(PassManagerWrapper &PMW) const;
-  Error addRegAssignmentOptimized(PassManagerWrapper &PMWM) const;
+  Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const;
 
   /// Allow the target to disable a specific pass by default.
   /// Backend can declare unwanted passes in constructor.
@@ -977,12 +977,9 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addMachinePasses(
 
   // Run register allocation and passes that are tightly coupled with it,
   // including phi elimination and scheduling.
-  if (*Opt.OptimizeRegAlloc) {
-    derived().addOptimizedRegAlloc(PMW);
-  } else {
-    if (auto Err = derived().addFastRegAlloc(PMW))
-      return Err;
-  }
+  if (auto Err = *Opt.OptimizeRegAlloc ? derived().addOptimizedRegAlloc(PMW)
+                                       : derived().addFastRegAlloc(PMW))
+    return std::move(Err);
 
   // Run post-ra passes.
   derived().addPostRegAlloc(PMW);
@@ -1212,7 +1209,7 @@ Error CodeGenPassBuilder<Derived, TargetMachineT>::addFastRegAlloc(
 /// optimized register allocation, including coalescing, machine instruction
 /// scheduling, and register allocation itself.
 template <typename Derived, typename TargetMachineT>
-void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
+Error CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
     PassManagerWrapper &PMW) const {
   addMachineFunctionPass(DetectDeadLanesPass(), PMW);
 
@@ -1255,10 +1252,8 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
   // PreRA instruction scheduling.
   addMachineFunctionPass(MachineSchedulerPass(&TM), PMW);
 
-  if (auto E = derived().addRegAssignmentOptimized(PMW)) {
-    // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
-    return;
-  }
+  if (auto E = derived().addRegAssignmentOptimized(PMW))
+    return std::move(E);
 
   addMachineFunctionPass(StackSlotColoringPass(), PMW);
 
@@ -1274,6 +1269,8 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
   //
   // FIXME: can this move into MachineLateOptimization?
   addMachineFunctionPass(MachineLICMPass(), PMW);
+
+  return Error::success();
 }
 
 //===---------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7881623bcedd3..49c60c254f6f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -159,10 +159,14 @@ class AMDGPUCodeGenPassBuilder
   Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const;
   void addPreRegAlloc(PassManagerWrapper &PMW) const;
   Error addFastRegAlloc(PassManagerWrapper &PMW) const;
-  void addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
+  Error addOptimizedRegAlloc(PassManagerWrapper &PMW) const;
   void addPreSched2(PassManagerWrapper &PMW) const;
   void addPostBBSections(PassManagerWrapper &PMW) const;
 
+private:
+  Error validateRegAllocOptions() const;
+
+public:
   /// Check if a pass is enabled given \p Opt option. The option always
   /// overrides defaults if explicitly used. Otherwise its default will be used
   /// given that a pass shall work at an optimization \p Level minimum.
@@ -244,6 +248,63 @@ static cl::opt<WWMRegisterRegAlloc::FunctionPassCtor, false,
                 cl::init(&useDefaultRegisterAllocator),
                 cl::desc("Register allocator to use for WWM registers"));
 
+// New pass manager register allocator options for AMDGPU
+static cl::opt<RegAllocType, false, RegAllocTypeParser> SGPRRegAllocNPM(
+    "sgpr-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+    cl::desc("Register allocator for SGPRs (new pass manager)"));
+
+static cl::opt<RegAllocType, false, RegAllocTypeParser> VGPRRegAllocNPM(
+    "vgpr-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+    cl::desc("Register allocator for VGPRs (new pass manager)"));
+
+static cl::opt<RegAllocType, false, RegAllocTypeParser> WWMRegAllocNPM(
+    "wwm-regalloc-npm", cl::Hidden, cl::init(RegAllocType::Default),
+    cl::desc("Register allocator for WWM registers (new pass manager)"));
+
+/// Check if the given RegAllocType is supported for AMDGPU NPM register
+/// allocation. Only Fast and Greedy are supported; Basic and PBQP are not.
+static Error checkRegAllocSupported(RegAllocType RAType, StringRef RegName) {
+  if (RAType == RegAllocType::Basic || RAType == RegAllocType::PBQP) {
+    return make_error<StringError>(
+        Twine("unsupported register allocator '") +
+            (RAType == RegAllocType::Basic ? "basic" : "pbqp") + "' for " +
+            RegName + " registers",
+        inconvertibleErrorCode());
+  }
+  return Error::success();
+}
+
+Error AMDGPUCodeGenPassBuilder::validateRegAllocOptions() const {
+  // 1. Generic --regalloc-npm is not supported for AMDGPU.
+  if (Opt.RegAlloc != RegAllocType::Unset) {
+    return make_error<StringError>(
+        "-regalloc-npm not supported for amdgcn. Use -sgpr-regalloc-npm, "
+        "-vgpr-regalloc-npm, and -wwm-regalloc-npm",
+        inconvertibleErrorCode());
+  }
+
+  // 2. Legacy PM regalloc options are not compatible with NPM.
+  if (SGPRRegAlloc.getNumOccurrences() > 0 ||
+      VGPRRegAlloc.getNumOccurrences() > 0 ||
+      WWMRegAlloc.getNumOccurrences() > 0) {
+    return make_error<StringError>(
+        "-sgpr-regalloc, -vgpr-regalloc, and -wwm-regalloc are legacy PM "
+        "options. Use -sgpr-regalloc-npm, -vgpr-regalloc-npm, and "
+        "-wwm-regalloc-npm with the new pass manager",
+        inconvertibleErrorCode());
+  }
+
+  // 3. Only Fast and Greedy allocators are supported for AMDGPU.
+  if (auto Err = checkRegAllocSupported(SGPRRegAllocNPM, "SGPR"))
+    return Err;
+  if (auto Err = checkRegAllocSupported(WWMRegAllocNPM, "WWM"))
+    return Err;
+  if (auto Err = checkRegAllocSupported(VGPRRegAllocNPM, "VGPR"))
+    return Err;
+
+  return Error::success();
+}
+
 static void initializeDefaultSGPRRegisterAllocatorOnce() {
   RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
 
@@ -2327,12 +2388,17 @@ Error AMDGPUCodeGenPassBuilder::addFastRegAlloc(PassManagerWrapper &PMW) const {
 
 Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
     PassManagerWrapper &PMW) const {
-  // TODO: handle default regalloc override error (with regalloc-npm)
+  if (auto Err = validateRegAllocOptions())
+    return Err;
 
   addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW);
 
-  addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
-                         PMW);
+  // SGPR allocation - default to fast at -O0.
+  if (SGPRRegAllocNPM == RegAllocType::Greedy)
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
+  else
+    addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
+                           PMW);
 
   // Equivalent of PEI for SGPRs.
   addMachineFunctionPass(SILowerSGPRSpillsPass(), PMW);
@@ -2340,20 +2406,26 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
   // To Allocate wwm registers used in whole quad mode operations (for shaders).
   addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW);
 
-  // For allocating other wwm register operands.
-  addMachineFunctionPass(RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}),
-                         PMW);
+  // WWM allocation - default to fast at -O0.
+  if (WWMRegAllocNPM == RegAllocType::Greedy)
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
+  else
+    addMachineFunctionPass(
+        RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}), PMW);
 
   addMachineFunctionPass(SILowerWWMCopiesPass(), PMW);
   addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW);
 
-  // For allocating per-thread VGPRs.
-  addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+  // VGPR allocation - default to fast at -O0.
+  if (VGPRRegAllocNPM == RegAllocType::Greedy)
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+  else
+    addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
 
   return Error::success();
 }
 
-void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
+Error AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
     PassManagerWrapper &PMW) const {
   if (EnableDCEInRA)
     insertPass<DetectDeadLanesPass>(DeadMachineInstructionElimPass());
@@ -2389,7 +2461,7 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc(
   if (TM.getOptLevel() > CodeGenOptLevel::Less)
     insertPass<MachineSchedulerPass>(SIFormMemoryClausesPass());
 
-  Base::addOptimizedRegAlloc(PMW);
+  return Base::addOptimizedRegAlloc(PMW);
 }
 
 void AMDGPUCodeGenPassBuilder::addPreRegAlloc(PassManagerWrapper &PMW) const {
@@ -2399,11 +2471,17 @@ void AMDGPUCodeGenPassBuilder::addPreRegAlloc(PassManagerWrapper &PMW) const {
 
 Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
     PassManagerWrapper &PMW) const {
-  // TODO: Check --regalloc-npm option
+  if (auto Err = validateRegAllocOptions())
+    return Err;
 
   addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW);
 
-  addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
+  // SGPR allocation - default to greedy at -O1 and above.
+  if (SGPRRegAllocNPM == RegAllocType::Fast)
+    addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}),
+                           PMW);
+  else
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateSGPRs, "sgpr"}), PMW);
 
   // Commit allocated register changes. This is mostly necessary because too
   // many things rely on the use lists of the physical registers, such as the
@@ -2422,14 +2500,21 @@ Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
   // To Allocate wwm registers used in whole quad mode operations (for shaders).
   addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW);
 
-  // For allocating other wwm register operands.
-  addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
+  // WWM allocation - default to greedy at -O1 and above.
+  if (WWMRegAllocNPM == RegAllocType::Fast)
+    addMachineFunctionPass(
+        RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}), PMW);
+  else
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateWWMRegs, "wwm"}), PMW);
   addMachineFunctionPass(SILowerWWMCopiesPass(), PMW);
   addMachineFunctionPass(VirtRegRewriterPass(false), PMW);
   addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW);
 
-  // For allocating per-thread VGPRs.
-  addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+  // VGPR allocation - default to greedy at -O1 and above.
+  if (VGPRRegAllocNPM == RegAllocType::Fast)
+    addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW);
+  else
+    addMachineFunctionPass(RAGreedyPass({onlyAllocateVGPRs, "vgpr"}), PMW);
 
   addPreRewrite(PMW);
   addMachineFunctionPass(VirtRegRewriterPass(true), PMW);

diff  --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c..9823983f0f0bb 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -2,11 +2,65 @@
 # RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
 # RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
 
+# Test default behavior at -O0: uses fast allocator
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O0 -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-O0
+
+# Test default behavior at -O2: uses greedy allocator
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O2 -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=DEFAULT-O2
+
+# Test AMDGPU-specific NPM regalloc options
+# RUN: llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=fast -wwm-regalloc-npm=fast -vgpr-regalloc-npm=fast -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-FAST
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O3 -sgpr-regalloc-npm=greedy -wwm-regalloc-npm=greedy -vgpr-regalloc-npm=greedy -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-GREEDY
+# RUN: llc -mtriple=amdgcn -enable-new-pm -O3 -sgpr-regalloc-npm=fast -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=NPM-MIXED
+
+# Test error cases for unsupported allocators
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc-npm=basic -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-BASIC
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -vgpr-regalloc-npm=pbqp -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-PBQP
+
+# Test error when legacy PM options are used with NPM
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -sgpr-regalloc=greedy -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-LEGACY
+
+# Test error when generic --regalloc-npm is used with AMDGPU
+# RUN: not llc -mtriple=amdgcn -enable-new-pm -regalloc-npm=fast -filetype=null %s 2>&1 | FileCheck %s --check-prefix=ERR-GENERIC
+
 # PASS: regallocfast<filter=sgpr>
 # PASS: regallocfast<filter=wwm>
 # PASS: regallocfast<filter=vgpr>
 # BAD-FILTER: invalid regallocfast register filter 'bad-filter'
 
+# At -O0, default uses fast allocator for all register classes.
+# DEFAULT-O0: regallocfast<filter=sgpr
+# DEFAULT-O0: regallocfast<filter=wwm
+# DEFAULT-O0: regallocfast<filter=vgpr
+
+# At -O2, default uses greedy allocator for all register classes.
+# DEFAULT-O2: greedy<sgpr>
+# DEFAULT-O2: greedy<wwm>
+# DEFAULT-O2: greedy<vgpr>
+
+# NPM-FAST: regallocfast<filter=sgpr
+# NPM-FAST: regallocfast<filter=wwm
+# NPM-FAST: regallocfast<filter=vgpr
+
+# NPM-GREEDY: greedy<sgpr>
+# NPM-GREEDY: greedy<wwm>
+# NPM-GREEDY: greedy<vgpr>
+
+# At -O3, default is greedy. With -sgpr-regalloc-npm=fast, SGPR uses fast,
+# but WWM and VGPR still use greedy.
+# NPM-MIXED: regallocfast<filter=sgpr
+# NPM-MIXED: greedy<wwm>
+# NPM-MIXED: greedy<vgpr>
+
+# Error messages for unsupported allocators.
+# ERR-BASIC: unsupported register allocator 'basic' for SGPR registers
+# ERR-PBQP: unsupported register allocator 'pbqp' for VGPR registers
+
+# Error message for legacy PM options with NPM.
+# ERR-LEGACY: -sgpr-regalloc, -vgpr-regalloc, and -wwm-regalloc are legacy PM options
+
+# Error message for generic --regalloc-npm with AMDGPU.
+# ERR-GENERIC: -regalloc-npm not supported for amdgcn
 ---
 name: f
 ...


        


More information about the llvm-commits mailing list