[llvm] [CGProfile] Use callee's PGO name when caller->callee is an indirect call. (PR #78610)
Mingming Liu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 18 10:28:45 PST 2024
https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/78610
- With PGO, indirect call edges are constructed using value profiles, and the profile address is mapped to a function's PGO name.
- With ThinLTO, local functions [could be promoted](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp#L288) to have external linkage; and with [full](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/LTO/LTO.cpp#L1328) or [thin](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/LTO/LTO.cpp#L448) LTO, global functions could be internalized. Edge construction should use a function's PGO name before its linkage is updated.
>From 90074a8722090a9144d987b10ee59ff37891c109 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 11 Jan 2024 12:46:09 -0800
Subject: [PATCH] [CGProfile] Use callee's PGO name when caller->callee is an
indirect call.
- With PGO, indirect call edges are constructed using value profiles,
and the profile address is mapped to a function's PGO name.
- With ThinLTO, local functions could be promoted to have external
linkage; and with full or thin LTO, global functions could be internalized.
Edge construction should use a function's PGO name before its linkage
is updated.
---
.../llvm/Transforms/Instrumentation/CGProfile.h | 4 ++++
llvm/lib/Passes/PassBuilder.cpp | 4 ++++
llvm/lib/Passes/PassBuilderPipelines.cpp | 5 +++--
llvm/lib/Passes/PassRegistry.def | 5 ++++-
.../Transforms/Instrumentation/CGProfile.cpp | 8 ++++----
llvm/test/Instrumentation/cgprofile.ll | 17 ++++++++++++-----
6 files changed, 31 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
index 9f9ce42277a0c12..0011b7b8b36c770 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -18,7 +18,11 @@ namespace llvm {
class Module;
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
public:
+ CGProfilePass(bool InLTO) : InLTO(InLTO) {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+ bool InLTO = false;
};
} // end namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb78..f5cd6c507a3d4ea 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -732,6 +732,10 @@ Expected<bool> parseGlobalDCEPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "vfe-linkage-unit-visibility", "GlobalDCE");
}
+Expected<bool> parseCGProfilePassOptions(StringRef Params) {
+ return parseSinglePassOption(Params, "in-lto-post-link", "CGProfile");
+}
+
Expected<bool> parseInlinerPassOptions(StringRef Params) {
return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
}
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b25..525b83dee79b0dd 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1477,7 +1477,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
MPM.addPass(ConstantMergePass());
if (PTO.CallGraphProfile && !LTOPreLink)
- MPM.addPass(CGProfilePass());
+ MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+ LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
// TODO: Relative look table converter pass caused an issue when full lto is
// enabled. See https://reviews.llvm.org/D94355 for more details.
@@ -1972,7 +1973,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
MPM.addPass(MergeFunctionsPass());
if (PTO.CallGraphProfile)
- MPM.addPass(CGProfilePass());
+ MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c6496267..04e61a60addf796 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -47,7 +47,6 @@ MODULE_PASS("attributor", AttributorPass())
MODULE_PASS("attributor-light", AttributorLightPass())
MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
-MODULE_PASS("cg-profile", CGProfilePass())
MODULE_PASS("check-debugify", NewPMCheckDebugifyPass())
MODULE_PASS("constmerge", ConstantMergePass())
MODULE_PASS("coro-cleanup", CoroCleanupPass())
@@ -150,6 +149,10 @@ MODULE_PASS_WITH_PARAMS(
"asan", "AddressSanitizerPass",
[](AddressSanitizerOptions Opts) { return AddressSanitizerPass(Opts); },
parseASanPassOptions, "kernel")
+MODULE_PASS_WITH_PARAMS(
+ "cg-profile", "CGProfilePass",
+ [](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink);},
+ parseCGProfilePassOptions, "in-lto-post-link")
MODULE_PASS_WITH_PARAMS(
"globaldce", "GlobalDCEPass",
[](bool InLTOPostLink) { return GlobalDCEPass(InLTOPostLink); },
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index e2e5f21b376b035..c322d0abd6bc1d1 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -44,8 +44,8 @@ addModuleFlags(Module &M,
return true;
}
-static bool runCGProfilePass(
- Module &M, FunctionAnalysisManager &FAM) {
+static bool runCGProfilePass(Module &M, FunctionAnalysisManager &FAM,
+ bool InLTO) {
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
InstrProfSymtab Symtab;
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
@@ -59,7 +59,7 @@ static bool runCGProfilePass(
Count = SaturatingAdd(Count, NewCount);
};
// Ignore error here. Indirect calls are ignored if this fails.
- (void)(bool) Symtab.create(M);
+ (void)(bool)Symtab.create(M, InLTO);
for (auto &F : M) {
// Avoid extra cost of running passes for BFI when the function doesn't have
// entry count.
@@ -101,7 +101,7 @@ static bool runCGProfilePass(
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
FunctionAnalysisManager &FAM =
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
- runCGProfilePass(M, FAM);
+ runCGProfilePass(M, FAM, InLTO);
return PreservedAnalyses::all();
}
diff --git a/llvm/test/Instrumentation/cgprofile.ll b/llvm/test/Instrumentation/cgprofile.ll
index 2a523d33ae07726..72d10f6754c4875 100644
--- a/llvm/test/Instrumentation/cgprofile.ll
+++ b/llvm/test/Instrumentation/cgprofile.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes cg-profile -S | FileCheck %s
+; RUN: opt < %s -passes='cg-profile<in-lto-post-link>' -S | FileCheck %s --check-prefixes=CHECK,LTO
+; RUN: opt < %s -passes='cg-profile' -S | FileCheck %s --check-prefixes=CHECK,NOLTO --implicit-check-not="!{ptr @freq, ptr @func3.llvm.12345"
declare void @b()
@@ -7,10 +8,14 @@ define void @a() !prof !1 {
ret void
}
+define void @func3.llvm.12345() !PGOFuncName !4 {
+ ret void
+}
+
@foo = common global ptr null, align 8
declare i32 @func1()
declare i32 @func2()
-declare i32 @func3()
+
declare i32 @func4()
declare dllimport i32 @func5()
declare i32 @func6()
@@ -29,15 +34,17 @@ B:
!1 = !{!"function_entry_count", i64 32}
!2 = !{!"branch_weights", i32 5, i32 10}
-!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10, i64 3667884930908592509, i64 1, i64 15435711456043681792, i64 0}
+!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 5415368997850289431, i64 150, i64 -2545542355363006406, i64 10, i64 3667884930908592509, i64 1, i64 15435711456043681792, i64 0}
+!4 = !{!"cgprofile.ll;func3"}
; CHECK: !llvm.module.flags = !{![[cgprof:[0-9]+]]}
; CHECK: ![[cgprof]] = !{i32 5, !"CG Profile", ![[prof:[0-9]+]]}
-; CHECK: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e3:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
+; LTO: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e3:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
+; NOLTO: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
; CHECK: ![[e0]] = !{ptr @a, ptr @b, i64 32}
; CHECK: ![[e1]] = !{ptr @freq, ptr @func4, i64 1030}
; CHECK: ![[e2]] = !{ptr @freq, ptr @func2, i64 410}
-; CHECK: ![[e3]] = !{ptr @freq, ptr @func3, i64 150}
+; LTO: ![[e3]] = !{ptr @freq, ptr @func3.llvm.12345, i64 150}
; CHECK: ![[e4]] = !{ptr @freq, ptr @func1, i64 10}
; CHECK: ![[e5]] = !{ptr @freq, ptr @a, i64 11}
; CHECK: ![[e6]] = !{ptr @freq, ptr @b, i64 21}
More information about the llvm-commits
mailing list