[clang] [llvm] [PassBuilder] Add `LTOPreLink` to early simplication EP call backs (PR #114547)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 1 08:12:58 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-amdgpu
Author: Shilei Tian (shiltian)
<details>
<summary>Changes</summary>
The early simplication pipeline is used in non-LTO and (Thin/Full)LTO pre-link
stage. There are some passes that we want them in non-LTO mode, but not at LTO
pre-link stage. The control is missing currently. This PR adds the support. To
demonstrate the use, we only enable the internalization pass in non-LTO mode for
AMDGPU because having it run in pre-link stage causes some issues.
---
Full diff: https://github.com/llvm/llvm-project/pull/114547.diff
7 Files Affected:
- (modified) clang/lib/CodeGen/BackendUtil.cpp (+1-1)
- (modified) llvm/include/llvm/Passes/PassBuilder.h (+6-3)
- (modified) llvm/lib/Passes/PassBuilderPipelines.cpp (+7-4)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+12-4)
- (modified) llvm/lib/Target/BPF/BPFTargetMachine.cpp (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll (+8)
- (modified) llvm/tools/opt/NewPMDriver.cpp (+1-1)
``````````diff
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index ae33554a66b6b5..76cdf64b3c5a80 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -993,7 +993,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
createModuleToFunctionPassAdaptor(ObjCARCExpandPass()));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [](ModulePassManager &MPM, OptimizationLevel Level) {
+ [](ModulePassManager &MPM, OptimizationLevel Level, bool) {
if (Level != OptimizationLevel::O0)
MPM.addPass(ObjCARCAPElimPass());
});
diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index e6ced0cccb9b3c..530db62bce0efa 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -478,7 +478,8 @@ class PassBuilder {
/// This extension point allows adding optimization right after passes that do
/// basic simplification of the input IR.
void registerPipelineEarlySimplificationEPCallback(
- const std::function<void(ModulePassManager &, OptimizationLevel)> &C) {
+ const std::function<void(ModulePassManager &, OptimizationLevel, bool)>
+ &C) {
PipelineEarlySimplificationEPCallbacks.push_back(C);
}
@@ -637,7 +638,8 @@ class PassBuilder {
void invokePipelineStartEPCallbacks(ModulePassManager &MPM,
OptimizationLevel Level);
void invokePipelineEarlySimplificationEPCallbacks(ModulePassManager &MPM,
- OptimizationLevel Level);
+ OptimizationLevel Level,
+ bool LTOPreLink = false);
static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
if (!Name.consume_front(PassName))
@@ -762,7 +764,8 @@ class PassBuilder {
FullLinkTimeOptimizationLastEPCallbacks;
SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
PipelineStartEPCallbacks;
- SmallVector<std::function<void(ModulePassManager &, OptimizationLevel)>, 2>
+ SmallVector<std::function<void(ModulePassManager &, OptimizationLevel, bool)>,
+ 2>
PipelineEarlySimplificationEPCallbacks;
SmallVector<std::function<void(ModuleAnalysisManager &)>, 2>
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 3f28dd39911f79..8c76e0c4955959 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -384,9 +384,9 @@ void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
C(MPM, Level);
}
void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
- ModulePassManager &MPM, OptimizationLevel Level) {
+ ModulePassManager &MPM, OptimizationLevel Level, bool LTOPreLink) {
for (auto &C : PipelineEarlySimplificationEPCallbacks)
- C(MPM, Level);
+ C(MPM, Level, LTOPreLink);
}
// Helper to add AnnotationRemarksPass.
@@ -1140,7 +1140,10 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
MPM.addPass(LowerTypeTestsPass(nullptr, nullptr,
lowertypetests::DropTestKind::Assume));
- invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
+ bool LTOPreLink = (Phase == ThinOrFullLTOPhase::FullLTOPreLink) ||
+ (Phase == ThinOrFullLTOPhase::ThinLTOPreLink);
+
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level, LTOPreLink);
// Interprocedural constant propagation now that basic cleanup has occurred
// and prior to optimizing globals.
@@ -2155,7 +2158,7 @@ ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
if (PGOOpt && PGOOpt->DebugInfoForProfiling)
MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
- invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
+ invokePipelineEarlySimplificationEPCallbacks(MPM, Level, LTOPreLink);
// Build a minimal pipeline based on the semantics required by LLVM,
// which is just that always inlining occurs. Further, disable generating
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index d93ec34a703d3d..1d295804fe3d2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -747,7 +747,7 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
});
PB.registerPipelineEarlySimplificationEPCallback(
- [](ModulePassManager &PM, OptimizationLevel Level) {
+ [](ModulePassManager &PM, OptimizationLevel Level, bool LTOPreLink) {
PM.addPass(AMDGPUPrintfRuntimeBindingPass());
if (Level == OptimizationLevel::O0)
@@ -755,7 +755,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUUnifyMetadataPass());
- if (InternalizeSymbols) {
+ // We don't want to run internalization at per-module stage.
+ if (InternalizeSymbols && !LTOPreLink) {
PM.addPass(InternalizePass(mustPreserveGV));
PM.addPass(GlobalDCEPass());
}
@@ -823,8 +824,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PM.addPass(AMDGPUSwLowerLDSPass(*this));
if (EnableLowerModuleLDS)
PM.addPass(AMDGPULowerModuleLDSPass(*this));
- if (EnableAMDGPUAttributor && Level != OptimizationLevel::O0)
- PM.addPass(AMDGPUAttributorPass(*this));
+ if (Level != OptimizationLevel::O0) {
+ if (EnableAMDGPUAttributor)
+ PM.addPass(AMDGPUAttributorPass(*this));
+ // Do we really need internalization in LTO?
+ if (InternalizeSymbols) {
+ PM.addPass(InternalizePass(mustPreserveGV));
+ PM.addPass(GlobalDCEPass());
+ }
+ }
});
PB.registerRegClassFilterParsingCallback(
diff --git a/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 7d91fa8bb824cf..6775930c8fb2dd 100644
--- a/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -138,7 +138,7 @@ void BPFTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
FPM.addPass(BPFPreserveStaticOffsetPass(false));
});
PB.registerPipelineEarlySimplificationEPCallback(
- [=](ModulePassManager &MPM, OptimizationLevel) {
+ [=](ModulePassManager &MPM, OptimizationLevel, bool) {
MPM.addPass(BPFAdjustOptPass());
});
}
diff --git a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
index b9eda0c1cd3bb6..792e83095efaa7 100644
--- a/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
+++ b/llvm/test/CodeGen/AMDGPU/print-pipeline-passes.ll
@@ -3,9 +3,17 @@
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O2>" -print-pipeline-passes %s -o - | FileCheck %s
; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto<O3>" -print-pipeline-passes %s -o - | FileCheck %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O0>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O1>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O2>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+; RUN: opt -mtriple=amdgcn--amdhsa -S -passes="lto-pre-link<O3>" -print-pipeline-passes %s -o - | FileCheck --check-prefix=PRE %s
+
+
; CHECK: amdgpu-attributor
; O0-NOT: amdgpu-attributor
+; PRE-NOT: internalize
+
define amdgpu_kernel void @kernel() {
entry:
ret void
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 9a477193a29365..ccf271356b64c5 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -294,7 +294,7 @@ static void registerEPCallbacks(PassBuilder &PB) {
if (tryParsePipelineText<ModulePassManager>(
PB, PipelineEarlySimplificationEPPipeline))
PB.registerPipelineEarlySimplificationEPCallback(
- [&PB](ModulePassManager &PM, OptimizationLevel) {
+ [&PB](ModulePassManager &PM, OptimizationLevel, bool) {
ExitOnError Err("Unable to parse EarlySimplification pipeline: ");
Err(PB.parsePassPipeline(PM, PipelineEarlySimplificationEPPipeline));
});
``````````
</details>
https://github.com/llvm/llvm-project/pull/114547
More information about the cfe-commits
mailing list