[llvm-branch-commits] [clang] [llvm] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline (PR #114577)
Shilei Tian via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Nov 3 18:58:44 PST 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/114577
>From a931d1ad84429798fe01ec76dc77cd221f03d2d4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Fri, 1 Nov 2024 12:39:52 -0400
Subject: [PATCH] [PassBuilder] Add `ThinOrFullLTOPhase` to optimizer pipeline
---
clang/lib/CodeGen/BackendUtil.cpp | 22 +++++++++--------
llvm/include/llvm/Passes/PassBuilder.h | 20 +++++++++++-----
llvm/lib/Passes/PassBuilderPipelines.cpp | 24 +++++++++++--------
.../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 23 +++++++++++-------
.../CodeGen/AMDGPU/print-pipeline-passes.ll | 1 +
llvm/tools/opt/NewPMDriver.cpp | 4 ++--
6 files changed, 57 insertions(+), 37 deletions(-)
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 47a30f00612eb7..70035a5e069a90 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -674,7 +674,7 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
// Ensure we lower KCFI operand bundles with -O0.
PB.registerOptimizerLastEPCallback(
- [&](ModulePassManager &MPM, OptimizationLevel Level) {
+ [&](ModulePassManager &MPM, OptimizationLevel Level, ThinOrFullLTOPhase) {
if (Level == OptimizationLevel::O0 &&
LangOpts.Sanitize.has(SanitizerKind::KCFI))
MPM.addPass(createModuleToFunctionPassAdaptor(KCFIPass()));
@@ -693,8 +693,8 @@ static void addKCFIPass(const Triple &TargetTriple, const LangOptions &LangOpts,
static void addSanitizers(const Triple &TargetTriple,
const CodeGenOptions &CodeGenOpts,
const LangOptions &LangOpts, PassBuilder &PB) {
- auto SanitizersCallback = [&](ModulePassManager &MPM,
- OptimizationLevel Level) {
+ auto SanitizersCallback = [&](ModulePassManager &MPM, OptimizationLevel Level,
+ ThinOrFullLTOPhase) {
if (CodeGenOpts.hasSanitizeCoverage()) {
auto SancovOpts = getSancovOptsFromCGOpts(CodeGenOpts);
MPM.addPass(SanitizerCoveragePass(
@@ -778,9 +778,10 @@ static void addSanitizers(const Triple &TargetTriple,
};
if (ClSanitizeOnOptimizerEarlyEP) {
PB.registerOptimizerEarlyEPCallback(
- [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level) {
+ [SanitizersCallback](ModulePassManager &MPM, OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
ModulePassManager NewMPM;
- SanitizersCallback(NewMPM, Level);
+ SanitizersCallback(NewMPM, Level, Phase);
if (!NewMPM.isEmpty()) {
// Sanitizers can abandon<GlobalsAA>.
NewMPM.addPass(RequireAnalysisPass<GlobalsAA, llvm::Module>());
@@ -1058,11 +1059,12 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
// TODO: Consider passing the MemoryProfileOutput to the pass builder via
// the PGOOptions, and set this up there.
if (!CodeGenOpts.MemoryProfileOutput.empty()) {
- PB.registerOptimizerLastEPCallback(
- [](ModulePassManager &MPM, OptimizationLevel Level) {
- MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
- MPM.addPass(ModuleMemProfilerPass());
- });
+ PB.registerOptimizerLastEPCallback([](ModulePassManager &MPM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase) {
+ MPM.addPass(createModuleToFunctionPassAdaptor(MemProfilerPass()));
+ MPM.addPass(ModuleMemProfilerPass());
+ });
}
if (CodeGenOpts.FatLTO) {
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 565fd2ab2147e5..e7bc3a58f414f1 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -490,7 +490,8 @@ class PassBuilder {
/// This extension point allows adding optimizations before the function
/// optimization pipeline.
void registerOptimizerEarlyEPCallback(
- const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ const std::function<void(ModulePassManager &, OptimizationLevel,
+ ThinOrFullLTOPhase Phase)> &C) {
OptimizerEarlyEPCallbacks.push_back(C);
}
@@ -499,7 +500,8 @@ class PassBuilder {
/// This extension point allows adding optimizations at the very end of the
/// function optimization pipeline.
void registerOptimizerLastEPCallback(
- const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ const std::function<void(ModulePassManager &, OptimizationLevel,
+ ThinOrFullLTOPhase)> &C) {
OptimizerLastEPCallbacks.push_back(C);
}
@@ -630,9 +632,11 @@ class PassBuilder {
void invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
OptimizationLevel Level);
void invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level);
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
void invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level);
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase);
void invokeFullLinkTimeOptimizationEarlyEPCallbacks(ModulePassManager &MPM,
OptimizationLevel Level);
void invokeFullLinkTimeOptimizationLastEPCallbacks(ModulePassManager &MPM,
@@ -756,9 +760,13 @@ class PassBuilder {
SmallVector<std::function<void(FunctionPassManager &, OptimizationLevel)>, 2>
VectorizerStartEPCallbacks;
// Module callbacks
- SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+ ThinOrFullLTOPhase)>,
+ 2>
OptimizerEarlyEPCallbacks;
- SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel,
+ ThinOrFullLTOPhase)>,
+ 2>
OptimizerLastEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
FullLinkTimeOptimizationEarlyEPCallbacks;
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 9c90accd9c376b..16fe9a74bb9c0d 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -359,14 +359,16 @@ void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
C(FPM, Level);
}
void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level) {
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
for (auto &C : OptimizerEarlyEPCallbacks)
- C(MPM, Level);
+ C(MPM, Level, Phase);
}
void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level) {
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
for (auto &C : OptimizerLastEPCallbacks)
- C(MPM, Level);
+ C(MPM, Level, Phase);
}
void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
ModulePassManager &MPM, OptimizationLevel Level) {
@@ -1464,7 +1466,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (EnableGlobalAnalyses)
MPM.addPass(RecomputeGlobalsAAPass());
- invokeOptimizerEarlyEPCallbacks(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level, LTOPhase);
FunctionPassManager OptimizePM;
// Scheduling LoopVersioningLICM when inlining is over, because after that
@@ -1559,7 +1561,7 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
PTO.EagerlyInvalidateAnalyses));
- invokeOptimizerLastEPCallbacks(MPM, Level);
+ invokeOptimizerLastEPCallbacks(MPM, Level, LTOPhase);
// Split out cold code. Splitting is done late to avoid hiding context from
// other optimizations and inadvertently regressing performance. The tradeoff
@@ -1716,8 +1718,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
// optimization is going to be done in PostLink stage, but clang can't add
// callbacks there in case of in-process ThinLTO called by linker.
- invokeOptimizerEarlyEPCallbacks(MPM, Level);
- invokeOptimizerLastEPCallbacks(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level,
+ /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
+ invokeOptimizerLastEPCallbacks(MPM, Level,
+ /*Phase=*/ThinOrFullLTOPhase::ThinLTOPreLink);
// Emit annotation remarks.
addAnnotationRemarksPass(MPM);
@@ -2198,7 +2202,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
}
- invokeOptimizerEarlyEPCallbacks(MPM, Level);
+ invokeOptimizerEarlyEPCallbacks(MPM, Level, Phase);
if (!VectorizerStartEPCallbacks.empty()) {
FunctionPassManager FPM;
@@ -2216,7 +2220,7 @@ PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
CoroPM.addPass(GlobalDCEPass());
MPM.addPass(CoroConditionalWrapper(std::move(CoroPM)));
- invokeOptimizerLastEPCallbacks(MPM, Level);
+ invokeOptimizerLastEPCallbacks(MPM, Level, Phase);
if (isLTOPreLink(Phase))
addRequiredLTOPreLinkPasses(MPM);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 6e063756045a80..86d8dbe4d803cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -258,6 +258,11 @@ static WWMRegisterRegAlloc
createGreedyWWMRegisterAllocator);
static WWMRegisterRegAlloc fastRegAllocWWMReg("fast", "fast register allocator",
createFastWWMRegisterAllocator);
+
+static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
+ return Phase == ThinOrFullLTOPhase::FullLTOPreLink ||
+ Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
+}
} // anonymous namespace
static cl::opt<bool>
@@ -755,9 +760,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUUnifyMetadataPass());
// We don't want to run internalization at per-module stage.
- bool LTOPreLink = Phase == ThinOrFullLTOPhase::FullLTOPreLink ||
- Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
- if (InternalizeSymbols && !LTOPreLink) {
+ if (InternalizeSymbols && !isLTOPreLink(Phase)) {
PM.addPass(InternalizePass(mustPreserveGV));
PM.addPass(GlobalDCEPass());
}
@@ -809,12 +812,14 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
// FIXME: Why is AMDGPUAttributor not in CGSCC?
- PB.registerOptimizerLastEPCallback(
- [this](ModulePassManager &MPM, OptimizationLevel Level) {
- if (Level != OptimizationLevel::O0) {
- MPM.addPass(AMDGPUAttributorPass(*this));
- }
- });
+ PB.registerOptimizerLastEPCallback([this](ModulePassManager &MPM,
+ OptimizationLevel Level,
+ ThinOrFullLTOPhase Phase) {
+ if (Level != OptimizationLevel::O0) {
+ if (!isLTOPreLink(Phase))
+ MPM.addPass(AMDGPUAttributorPass(*this));
+ }
+ });
PB.registerFullLinkTimeOptimizationLastEPCallback(
[this](ModulePassManager &PM, OptimizationLevel Level) {
diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
index 13e38f1bdd3330..c68143f44866f3 100644
--- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
@@ -13,6 +13,7 @@
; O0-NOT: amdgpu-attributor
; PRE-NOT: internalize
+; PRE-NOT: amdgpu-attributor
define amdgpu_kernel void @kernel() {
entry:
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 3f1092433d9f31..ea300fb3dbeb13 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -300,13 +300,13 @@ static void registerEPCallbacks(PassBuilder &PB) {
});
if (tryParsePipelineText<ModulePassManager>(PB, OptimizerEarlyEPPipeline))
PB.registerOptimizerEarlyEPCallback(
- [&PB](ModulePassManager &PM, OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
ExitOnError Err("Unable to parse OptimizerEarlyEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerEarlyEPPipeline));
});
if (tryParsePipelineText<ModulePassManager>(PB, OptimizerLastEPPipeline))
PB.registerOptimizerLastEPCallback(
- [&PB](ModulePassManager &PM, OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel, ThinOrFullLTOPhase) {
ExitOnError Err("Unable to parse OptimizerLastEP pipeline: ");
Err(PB.parsePassPipeline(PM, OptimizerLastEPPipeline));
});
More information about the llvm-branch-commits
mailing list