[llvm-branch-commits] [llvm] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline (PR #120557)
Akshat Oke via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Dec 19 06:50:24 PST 2024
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/120557
>From f93228ae0597667a7be51dd062f4d31ff2d7f4cf Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 11 Dec 2024 10:57:21 +0000
Subject: [PATCH 1/4] [RegAlloc][NewPM] Plug Greedy RA in codegen pipeline
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 18 +++++++++++++++-
.../llvm/Passes/MachinePassRegistry.def | 4 ++--
.../include/llvm/Target/CGPassBuilderOption.h | 2 +-
llvm/lib/Passes/PassBuilder.cpp | 13 ++++++++++++
...plicit-def-remat-requires-impdef-check.mir | 1 +
...implicit-def-with-impdef-greedy-assert.mir | 1 +
llvm/test/CodeGen/AArch64/pr51516.mir | 1 +
llvm/test/CodeGen/AArch64/spill-fold.mir | 2 ++
.../extend-phi-subrange-not-in-parent.mir | 1 +
llvm/test/CodeGen/MIR/Generic/runPass.mir | 1 +
.../SystemZ/clear-liverange-spillreg.mir | 1 +
llvm/test/CodeGen/Thumb/high-reg-clobber.mir | 1 +
llvm/test/CodeGen/X86/limit-split-cost.mir | 1 +
.../test/tools/llc/new-pm/regalloc-amdgpu.mir | 17 +++++++++------
llvm/tools/llc/NewPMDriver.cpp | 21 +++++++++++++++----
15 files changed, 71 insertions(+), 14 deletions(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 28768a72c83fa3..4135abdad90ff6 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1059,7 +1059,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineSSAOptimization(
///
/// A target that uses the standard regalloc pass order for fast or optimized
/// allocation may still override this for per-target regalloc
-/// selection. But -regalloc=... always takes precedence.
+/// selection. But -regalloc-npm=... always takes precedence.
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
AddMachinePass &addPass, bool Optimized) const {
@@ -1076,6 +1076,22 @@ template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
// TODO: Parse Opt.RegAlloc to add register allocator.
+ // Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
+ if (Opt.RegAlloc > RegAllocType::Default) {
+ switch (Opt.RegAlloc) {
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ llvm_unreachable("Register allocator not supported yet.");
+ }
+ return;
+ }
+ // -regalloc=default or unspecified, so pick based on the optimization level.
+ derived().addTargetRegisterAllocator(addPass, Optimized);
}
template <typename Derived, typename TargetMachineT>
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 5facdfa825e4cb..abcc248334ed5f 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,12 +188,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
},
"filter=reg-filter;no-clear-vregs")
+// 'all' is the default filter
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"regallocgreedy", "RAGreedyPass",
[](RAGreedyPass::Options Opts) { return RAGreedyPass(Opts); },
[PB = this](StringRef Params) {
- // TODO: parseRegAllocGreedyFilterFunc(*PB, Params);
- return Expected<RAGreedyPass::Options>(RAGreedyPass::Options{});
+ return parseRegAllocGreedyFilterFunc(*PB, Params);
}, "reg-filter"
)
#undef MACHINE_FUNCTION_PASS_WITH_PARAMS
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 29bdb9c1746d3c..05c84c1bdec851 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- StringRef RegAlloc = "default";
+ RegAllocType RegAlloc = RegAllocType::Default;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index f64b6e0adb2b32..4c0550316835ff 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,6 +1315,19 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
+Expected<RAGreedyPass::Options> parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+ if (Params.empty() || Params == "all") {
+ return RAGreedyPass::Options();
+ }
+ std::optional<RegAllocFilterFunc> Filter = PB.parseRegAllocFilter(Params);
+ if (!Filter) {
+ return make_error<StringError>(
+ formatv("invalid regallocgreedy register filter '{0}' ", Params).str(),
+ inconvertibleErrorCode());
+ }
+ return RAGreedyPass::Options{*Filter, Params};
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
index 47aa34e3c01156..a168c2891c7d6f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-remat-requires-impdef-check.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-macosx -mcpu=apple-m1 -stress-regalloc=4 -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define void @inst_stores_to_dead_spill_implicit_def_impdef() {
diff --git a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
index a5d74ef75f0a0a..d9edda47638a3f 100644
--- a/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
+++ b/llvm/test/CodeGen/AArch64/implicit-def-with-impdef-greedy-assert.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc -mtriple=arm64-apple-ios -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=arm64-apple-ios -passes=regallocgreedy -o - %s | FileCheck %s
---
name: widget
diff --git a/llvm/test/CodeGen/AArch64/pr51516.mir b/llvm/test/CodeGen/AArch64/pr51516.mir
index 910bfb858b50f6..e84f0ca2015ce5 100644
--- a/llvm/test/CodeGen/AArch64/pr51516.mir
+++ b/llvm/test/CodeGen/AArch64/pr51516.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=aarch64-unknown-fuchsia -run-pass=greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-unknown-fuchsia -passes=regallocgreedy -verify-machineinstrs -o - %s | FileCheck %s
# Check that we spill %31 and do not rematerialize it since the use operand
# of ADDXri is killed by the STRXui in this block.
diff --git a/llvm/test/CodeGen/AArch64/spill-fold.mir b/llvm/test/CodeGen/AArch64/spill-fold.mir
index b1e7ebe3a7e82b..2773b5f19618a9 100644
--- a/llvm/test/CodeGen/AArch64/spill-fold.mir
+++ b/llvm/test/CodeGen/AArch64/spill-fold.mir
@@ -1,5 +1,7 @@
# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
# RUN: llc -mtriple=aarch64_be-none-linux-gnu -run-pass greedy -verify-machineinstrs -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=aarch64_be-none-linux-gnu -passes=regallocgreedy -o - %s | FileCheck %s
--- |
define i64 @test_subreg_spill_fold() { ret i64 0 }
define i64 @test_subreg_spill_fold2() { ret i64 0 }
diff --git a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
index 760ae6032230f5..42bba4d1504013 100644
--- a/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
+++ b/llvm/test/CodeGen/AMDGPU/extend-phi-subrange-not-in-parent.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -passes=regallocgreedy -o - %s | FileCheck %s
# Initially %2 starts out with 2 subranges (one for sub0, and one for
# the rest of the lanes). After %2 is split, after refineSubRanges the
diff --git a/llvm/test/CodeGen/MIR/Generic/runPass.mir b/llvm/test/CodeGen/MIR/Generic/runPass.mir
index 75763c5389b09e..41dd98ff909b0c 100644
--- a/llvm/test/CodeGen/MIR/Generic/runPass.mir
+++ b/llvm/test/CodeGen/MIR/Generic/runPass.mir
@@ -2,6 +2,7 @@
# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -run-pass=regallocfast -debug-pass=Arguments -o - %s | FileCheck %s
# RUN: llc -passes=regallocfast -o - %s | FileCheck %s
+# RUN: llc -passes=regallocgreedy -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
index 197c3d8551fc38..de0db97f14bf3c 100644
--- a/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
+++ b/llvm/test/CodeGen/SystemZ/clear-liverange-spillreg.mir
@@ -1,4 +1,5 @@
#RUN: llc -o - %s -mtriple=s390x-ibm-linux -run-pass=greedy
+#RUN: llc -o - %s -mtriple=s390x-ibm-linux -passes=regallocgreedy
#PR34502. Check HoistSpill works properly after the live range of spilled
#virtual register is cleared.
--- |
diff --git a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
index 1402c7c2cbca36..e085e38ae5fe31 100644
--- a/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
+++ b/llvm/test/CodeGen/Thumb/high-reg-clobber.mir
@@ -3,6 +3,7 @@
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass greedy %s -o - | FileCheck %s
# RUN: llc -mtriple thumbv6m-arm-none-eabi -run-pass regallocfast %s -o - | FileCheck %s --check-prefix=FAST
# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocfast %s -o - | FileCheck %s --check-prefix=FAST
+# RUN: llc -mtriple thumbv6m-arm-none-eabi -passes=regallocgreedy %s -o - | FileCheck %s
...
---
diff --git a/llvm/test/CodeGen/X86/limit-split-cost.mir b/llvm/test/CodeGen/X86/limit-split-cost.mir
index 7ec0404e0f737c..eec8a3939151a6 100644
--- a/llvm/test/CodeGen/X86/limit-split-cost.mir
+++ b/llvm/test/CodeGen/X86/limit-split-cost.mir
@@ -1,5 +1,6 @@
# REQUIRES: asserts
# RUN: llc -mtriple=x86_64-- -run-pass=greedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
+# RUN: llc -mtriple=x86_64-- -passes=regallocgreedy %s -debug-only=regalloc -huge-size-for-split=0 -o /dev/null 2>&1 | FileCheck %s
# Check no global region split is needed because the live range to split is trivially rematerializable.
# CHECK-NOT: Compact region bundles
--- |
diff --git a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
index 07f2d350ffd9c0..66c9d8942f3da4 100644
--- a/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
+++ b/llvm/test/tools/llc/new-pm/regalloc-amdgpu.mir
@@ -1,12 +1,17 @@
# REQUIRES: amdgpu-registered-target
-# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=PASS
-# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=BAD-FILTER
+# RUN: llc -mtriple=amdgcn --passes='regallocfast<filter=sgpr>,regallocfast<filter=wwm>,regallocfast<filter=vgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAFAST
+# RUN: not llc -mtriple=amdgcn --passes='regallocfast<filter=bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAFAST-BAD-FILTER
-# PASS: regallocfast<filter=sgpr>
-# PASS: regallocfast<filter=wwm>
-# PASS: regallocfast<filter=vgpr>
-# BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+# RUN: llc -mtriple=amdgcn -passes='regallocgreedy<sgpr>' --print-pipeline-passes --filetype=null %s | FileCheck %s --check-prefix=RAGREEDY
+# RUN: not llc -mtriple=amdgcn -passes='regallocgreedy<bad-filter>' --print-pipeline-passes --filetype=null %s 2>&1 | FileCheck %s --check-prefix=RAGREEDY-BAD-FILTER
+# RAFAST: regallocfast<filter=sgpr>
+# RAFAST: regallocfast<filter=wwm>
+# RAFAST: regallocfast<filter=vgpr>
+# RAFAST-BAD-FILTER: invalid regallocfast register filter 'bad-filter'
+
+# RAGREEDY: regallocgreedy<sgpr>
+# RAGREEDY-BAD-FILTER: invalid regallocgreedy register filter 'bad-filter'
---
name: f
...
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 3892fbb8c74f78..06b7fa7ac351b0 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,10 +48,23 @@
using namespace llvm;
-static cl::opt<std::string>
- RegAlloc("regalloc-npm",
- cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init("default"));
+// static cl::opt<std::string>
+// RegAlloc("regalloc-npm",
+// cl::desc("Register allocator to use for new pass manager"),
+// cl::Hidden, cl::init("default"));
+
+// create option for RegAllocType enum
+static cl::opt<RegAllocType> RegAlloc(
+ "regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
+ cl::Hidden, cl::init(RegAllocType::Default),
+ cl::values(
+ clEnumValN(RegAllocType::Default, "default",
+ "Default register allocator"),
+ clEnumValN(RegAllocType::PBQP, "pbqp", "PBQP register allocator"),
+ clEnumValN(RegAllocType::Fast, "fast", "Fast register allocator"),
+ clEnumValN(RegAllocType::Basic, "basic", "Basic register allocator"),
+ clEnumValN(RegAllocType::Greedy, "greedy",
+ "Greedy register allocator")));
static cl::opt<bool>
DebugPM("debug-pass-manager", cl::Hidden,
>From 20eede6d14e5af2c536d133278d42b5cd01820c9 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 19 Dec 2024 11:00:46 +0000
Subject: [PATCH 2/4] Error out on AMDGPU for regalloc-npm flag
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 29 ++++++++++---------
.../include/llvm/Target/CGPassBuilderOption.h | 4 +--
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 2 ++
.../CodeGen/AMDGPU/sgpr-regalloc-flags.ll | 3 ++
llvm/tools/llc/NewPMDriver.cpp | 2 +-
6 files changed, 47 insertions(+), 16 deletions(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 4135abdad90ff6..31fb8af7576c7b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1086,7 +1086,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
addPass(RAGreedyPass());
break;
default:
- llvm_unreachable("Register allocator not supported yet.");
+ report_fatal_error("Register allocator not supported yet.", false);
}
return;
}
@@ -1162,20 +1162,23 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addOptimizedRegAlloc(
// PreRA instruction scheduling.
addPass(MachineSchedulerPass());
- if (derived().addRegAssignmentOptimized(addPass)) {
- // Allow targets to expand pseudo instructions depending on the choice of
- // registers before MachineCopyPropagation.
- derived().addPostRewrite(addPass);
+ if (auto E = derived().addRegAssignmentOptimized(addPass)) {
+ // addRegAssignmentOptimized did not add a reg alloc pass, so do nothing.
+ // FIXME: This is not really an error.
+ return;
+ }
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ derived().addPostRewrite(addPass);
- // Copy propagate to forward register uses and try to eliminate COPYs that
- // were not coalesced.
- addPass(MachineCopyPropagationPass());
+ // Copy propagate to forward register uses and try to eliminate COPYs that
+ // were not coalesced.
+ addPass(MachineCopyPropagationPass());
- // Run post-ra machine LICM to hoist reloads / remats.
- //
- // FIXME: can this move into MachineLateOptimization?
- addPass(MachineLICMPass());
- }
+ // Run post-ra machine LICM to hoist reloads / remats.
+ //
+ // FIXME: can this move into MachineLateOptimization?
+ addPass(MachineLICMPass());
}
//===---------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index 05c84c1bdec851..f146cc7b8028d8 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -20,7 +20,7 @@
namespace llvm {
enum class RunOutliner { TargetDefault, AlwaysOutline, NeverOutline };
-enum class RegAllocType { Default, Basic, Fast, Greedy, PBQP };
+enum class RegAllocType { Unset, Default, Basic, Fast, Greedy, PBQP };
// Not one-on-one but mostly corresponding to commandline options in
// TargetPassConfig.cpp.
@@ -53,7 +53,7 @@ struct CGPassBuilderOption {
bool RequiresCodeGenSCCOrder = false;
RunOutliner EnableMachineOutliner = RunOutliner::TargetDefault;
- RegAllocType RegAlloc = RegAllocType::Default;
+ RegAllocType RegAlloc = RegAllocType::Unset;
std::optional<GlobalISelAbortMode> EnableGlobalISelAbort;
std::string FSProfileFile;
std::string FSRemappingFile;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 7256eec89008a5..952bf479827f48 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -68,6 +68,7 @@
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Target/CGPassBuilderOption.h"
#include "llvm/Transforms/HipStdPar/HipStdPar.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
@@ -2099,6 +2100,28 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
addPass(SIShrinkInstructionsPass());
}
+static const char RegAllocNPMNotSupportedMessage[] =
+ "-regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, "
+ "-wwm-regalloc-npm, and -vgpr-regalloc-npm";
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
+Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast(
+ AddMachinePass &addPass) const {
+ if (Opt.RegAlloc != RegAllocType::Unset)
+ report_fatal_error(RegAllocNPMNotSupportedMessage, false);
+
+ return make_error<StringError>("not implemented yet",
+ inconvertibleErrorCode());
+}
+
bool AMDGPUCodeGenPassBuilder::isPassEnabled(const cl::opt<bool> &Opt,
CodeGenOptLevel Level) const {
if (Opt.getNumOccurrences())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 5ba58a92621edb..197476a0f80574 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -176,6 +176,8 @@ class AMDGPUCodeGenPassBuilder
void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
Error addInstSelector(AddMachinePass &) const;
void addMachineSSAOptimization(AddMachinePass &) const;
+ Error addRegAssignmentOptimized(AddMachinePass &) const;
+ Error addRegAssignmentFast(AddMachinePass &) const;
/// Check if a pass is enabled given \p Opt option. The option always
/// overrides defaults if explicitly used. Otherwise its default will be used
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index 52ad7e5355207d..a54fee3a0f964c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -12,8 +12,11 @@
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=basic -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
; RUN: not --crash llc -verify-machineinstrs=0 -regalloc=fast -O0 -mtriple=amdgcn-amd-amdhsa -debug-pass=Structure -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=fast -O0 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
+; RUN: not llc -enable-new-pm -verify-machineinstrs=0 -regalloc-npm=basic -O3 -mtriple=amdgcn-amd-amdhsa -o /dev/null %s 2>&1 | FileCheck -check-prefix=REGALLOC-NPM %s
; REGALLOC: -regalloc not supported with amdgcn. Use -sgpr-regalloc, -wwm-regalloc, and -vgpr-regalloc
+; REGALLOC-NPM: -regalloc-npm not supported with amdgcn. Use -sgpr-regalloc-npm, -wwm-regalloc-npm, and -vgpr-regalloc-npm
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index 06b7fa7ac351b0..de15b3717da699 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -56,7 +56,7 @@ using namespace llvm;
// create option for RegAllocType enum
static cl::opt<RegAllocType> RegAlloc(
"regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
- cl::Hidden, cl::init(RegAllocType::Default),
+ cl::Hidden, cl::init(RegAllocType::Unset),
cl::values(
clEnumValN(RegAllocType::Default, "default",
"Default register allocator"),
>From 6345993900b3d5f5dbc8b845249284e1d560b00c Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 19 Dec 2024 14:24:53 +0000
Subject: [PATCH 3/4] Remove the TODO as it's implemented
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 -
llvm/tools/llc/NewPMDriver.cpp | 6 ------
2 files changed, 7 deletions(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 31fb8af7576c7b..d74939d9efb8f4 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1075,7 +1075,6 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addTargetRegisterAllocator(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
AddMachinePass &addPass, bool Optimized) const {
- // TODO: Parse Opt.RegAlloc to add register allocator.
// Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
if (Opt.RegAlloc > RegAllocType::Default) {
switch (Opt.RegAlloc) {
diff --git a/llvm/tools/llc/NewPMDriver.cpp b/llvm/tools/llc/NewPMDriver.cpp
index de15b3717da699..0f7aa6284962a2 100644
--- a/llvm/tools/llc/NewPMDriver.cpp
+++ b/llvm/tools/llc/NewPMDriver.cpp
@@ -48,12 +48,6 @@
using namespace llvm;
-// static cl::opt<std::string>
-// RegAlloc("regalloc-npm",
-// cl::desc("Register allocator to use for new pass manager"),
-// cl::Hidden, cl::init("default"));
-
-// create option for RegAllocType enum
static cl::opt<RegAllocType> RegAlloc(
"regalloc-npm", cl::desc("Register allocator to use for new pass manager"),
cl::Hidden, cl::init(RegAllocType::Unset),
>From 5c2a6b27649352411696b5e21cce2192b2e5f949 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Thu, 19 Dec 2024 14:49:58 +0000
Subject: [PATCH 4/4] clang format files
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 16 ++++++++--------
llvm/lib/Passes/PassBuilder.cpp | 3 ++-
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index d74939d9efb8f4..2dbcd7122f6aa8 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1078,14 +1078,14 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addRegAllocPass(
// Use the specified -regalloc-npm={basic|greedy|fast|pbqp}
if (Opt.RegAlloc > RegAllocType::Default) {
switch (Opt.RegAlloc) {
- case RegAllocType::Fast:
- addPass(RegAllocFastPass());
- break;
- case RegAllocType::Greedy:
- addPass(RAGreedyPass());
- break;
- default:
- report_fatal_error("Register allocator not supported yet.", false);
+ case RegAllocType::Fast:
+ addPass(RegAllocFastPass());
+ break;
+ case RegAllocType::Greedy:
+ addPass(RAGreedyPass());
+ break;
+ default:
+ report_fatal_error("Register allocator not supported yet.", false);
}
return;
}
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4c0550316835ff..f9a4af486631eb 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1315,7 +1315,8 @@ parseBoundsCheckingOptions(StringRef Params) {
return Mode;
}
-Expected<RAGreedyPass::Options> parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
+Expected<RAGreedyPass::Options>
+parseRegAllocGreedyFilterFunc(PassBuilder &PB, StringRef Params) {
if (Params.empty() || Params == "all") {
return RAGreedyPass::Options();
}
More information about the llvm-branch-commits
mailing list