[llvm] [CGProfile] Use callee's PGO name when caller->callee is an indirect call. (PR #78610)

Mingming Liu via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 18 10:28:45 PST 2024


https://github.com/minglotus-6 created https://github.com/llvm/llvm-project/pull/78610

- With PGO, indirect call edges are constructed using value profiles, and the profile address is mapped to a function's PGO name.
- With ThinLTO, local functions [could be promoted](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp#L288) to have external linkage; and with [full](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/LTO/LTO.cpp#L1328) or [thin](https://github.com/llvm/llvm-project/blob/2663d2cb9c9361f0b234c40a0f50c7ba0748eb26/llvm/lib/LTO/LTO.cpp#L448) LTO, global functions could be internalized. Edge construction should use a function's PGO name before its linkage is updated.

>From 90074a8722090a9144d987b10ee59ff37891c109 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 11 Jan 2024 12:46:09 -0800
Subject: [PATCH] [CGProfile] Use callee's PGO name when caller->callee is an
 indirect call.

- With PGO, indirect call edges are constructed using value profiles,
  and the profile address is mapped to a function's PGO name.
- With ThinLTO, local functions could be promoted to have external
  linkage; and with full or thin LTO, global functions could be internalized.
  Edge construction should use a function's PGO name before its linkage
  is updated.
---
 .../llvm/Transforms/Instrumentation/CGProfile.h |  4 ++++
 llvm/lib/Passes/PassBuilder.cpp                 |  4 ++++
 llvm/lib/Passes/PassBuilderPipelines.cpp        |  5 +++--
 llvm/lib/Passes/PassRegistry.def                |  5 ++++-
 .../Transforms/Instrumentation/CGProfile.cpp    |  8 ++++----
 llvm/test/Instrumentation/cgprofile.ll          | 17 ++++++++++++-----
 6 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
index 9f9ce42277a0c12..0011b7b8b36c770 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CGProfile.h
@@ -18,7 +18,11 @@ namespace llvm {
 class Module;
 class CGProfilePass : public PassInfoMixin<CGProfilePass> {
 public:
+  CGProfilePass(bool InLTO) : InLTO(InLTO) {}
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+
+private:
+  bool InLTO = false;
 };
 } // end namespace llvm
 
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 439f749bda8bb78..f5cd6c507a3d4ea 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -732,6 +732,10 @@ Expected<bool> parseGlobalDCEPassOptions(StringRef Params) {
   return parseSinglePassOption(Params, "vfe-linkage-unit-visibility", "GlobalDCE");
 }
 
+Expected<bool> parseCGProfilePassOptions(StringRef Params) {
+  return parseSinglePassOption(Params, "in-lto-post-link", "CGProfile");
+}
+
 Expected<bool> parseInlinerPassOptions(StringRef Params) {
   return parseSinglePassOption(Params, "only-mandatory", "InlinerPass");
 }
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 5c6c391049a7b25..525b83dee79b0dd 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1477,7 +1477,8 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
   MPM.addPass(ConstantMergePass());
 
   if (PTO.CallGraphProfile && !LTOPreLink)
-    MPM.addPass(CGProfilePass());
+    MPM.addPass(CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
+                              LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
 
   // TODO: Relative look table converter pass caused an issue when full lto is
   // enabled. See https://reviews.llvm.org/D94355 for more details.
@@ -1972,7 +1973,7 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
     MPM.addPass(MergeFunctionsPass());
 
   if (PTO.CallGraphProfile)
-    MPM.addPass(CGProfilePass());
+    MPM.addPass(CGProfilePass(/*InLTOPostLink=*/true));
 
   invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
 
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 82ce040c6496267..04e61a60addf796 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -47,7 +47,6 @@ MODULE_PASS("attributor", AttributorPass())
 MODULE_PASS("attributor-light", AttributorLightPass())
 MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
 MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
-MODULE_PASS("cg-profile", CGProfilePass())
 MODULE_PASS("check-debugify", NewPMCheckDebugifyPass())
 MODULE_PASS("constmerge", ConstantMergePass())
 MODULE_PASS("coro-cleanup", CoroCleanupPass())
@@ -150,6 +149,10 @@ MODULE_PASS_WITH_PARAMS(
     "asan", "AddressSanitizerPass",
     [](AddressSanitizerOptions Opts) { return AddressSanitizerPass(Opts); },
     parseASanPassOptions, "kernel")
+MODULE_PASS_WITH_PARAMS(
+    "cg-profile", "CGProfilePass",
+    [](bool InLTOPostLink) { return CGProfilePass(InLTOPostLink);},
+    parseCGProfilePassOptions, "in-lto-post-link")
 MODULE_PASS_WITH_PARAMS(
     "globaldce", "GlobalDCEPass",
     [](bool InLTOPostLink) { return GlobalDCEPass(InLTOPostLink); },
diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
index e2e5f21b376b035..c322d0abd6bc1d1 100644
--- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
+++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp
@@ -44,8 +44,8 @@ addModuleFlags(Module &M,
   return true;
 }
 
-static bool runCGProfilePass(
-    Module &M, FunctionAnalysisManager &FAM) {
+static bool runCGProfilePass(Module &M, FunctionAnalysisManager &FAM,
+                             bool InLTO) {
   MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
   InstrProfSymtab Symtab;
   auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
@@ -59,7 +59,7 @@ static bool runCGProfilePass(
     Count = SaturatingAdd(Count, NewCount);
   };
   // Ignore error here.  Indirect calls are ignored if this fails.
-  (void)(bool) Symtab.create(M);
+  (void)(bool)Symtab.create(M, InLTO);
   for (auto &F : M) {
     // Avoid extra cost of running passes for BFI when the function doesn't have
     // entry count.
@@ -101,7 +101,7 @@ static bool runCGProfilePass(
 PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
   FunctionAnalysisManager &FAM =
       MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
-  runCGProfilePass(M, FAM);
+  runCGProfilePass(M, FAM, InLTO);
 
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/Instrumentation/cgprofile.ll b/llvm/test/Instrumentation/cgprofile.ll
index 2a523d33ae07726..72d10f6754c4875 100644
--- a/llvm/test/Instrumentation/cgprofile.ll
+++ b/llvm/test/Instrumentation/cgprofile.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -passes cg-profile -S | FileCheck %s
+; RUN: opt < %s -passes='cg-profile<in-lto-post-link>' -S | FileCheck %s --check-prefixes=CHECK,LTO
+; RUN: opt < %s -passes='cg-profile' -S | FileCheck %s --check-prefixes=CHECK,NOLTO --implicit-check-not="!{ptr @freq, ptr @func3.llvm.12345"
 
 declare void @b()
 
@@ -7,10 +8,14 @@ define void @a() !prof !1 {
   ret void
 }
 
+define void @func3.llvm.12345() !PGOFuncName !4 {
+  ret void
+}
+
 @foo = common global ptr null, align 8
 declare i32 @func1()
 declare i32 @func2()
-declare i32 @func3()
+
 declare i32 @func4()
 declare dllimport i32 @func5()
 declare i32 @func6()
@@ -29,15 +34,17 @@ B:
 
 !1 = !{!"function_entry_count", i64 32}
 !2 = !{!"branch_weights", i32 5, i32 10}
-!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10, i64 3667884930908592509, i64 1, i64 15435711456043681792, i64 0}
+!3 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 5415368997850289431, i64 150, i64 -2545542355363006406, i64 10, i64 3667884930908592509, i64 1, i64 15435711456043681792, i64 0}
+!4 = !{!"cgprofile.ll;func3"}
 
 ; CHECK: !llvm.module.flags = !{![[cgprof:[0-9]+]]}
 ; CHECK: ![[cgprof]] = !{i32 5, !"CG Profile", ![[prof:[0-9]+]]}
-; CHECK: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e3:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
+; LTO: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e3:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
+; NOLTO: ![[prof]] = distinct !{![[e0:[0-9]+]], ![[e1:[0-9]+]], ![[e2:[0-9]+]], ![[e4:[0-9]+]], ![[e5:[0-9]+]], ![[e6:[0-9]+]]}
 ; CHECK: ![[e0]] = !{ptr @a, ptr @b, i64 32}
 ; CHECK: ![[e1]] = !{ptr @freq, ptr @func4, i64 1030}
 ; CHECK: ![[e2]] = !{ptr @freq, ptr @func2, i64 410}
-; CHECK: ![[e3]] = !{ptr @freq, ptr @func3, i64 150}
+; LTO: ![[e3]] = !{ptr @freq, ptr @func3.llvm.12345, i64 150}
 ; CHECK: ![[e4]] = !{ptr @freq, ptr @func1, i64 10}
 ; CHECK: ![[e5]] = !{ptr @freq, ptr @a, i64 11}
 ; CHECK: ![[e6]] = !{ptr @freq, ptr @b, i64 21}



More information about the llvm-commits mailing list