[llvm] ba4da5a - [ctx_prof] "Use" support for pre-thinlink. (#101338)

Fri Aug 2 17:51:31 PDT 2024

Author: Mircea Trofin
Date: 2024-08-02T20:51:27-04:00
New Revision: ba4da5a087f28c9522bc7f173e99673bb3009af9

URL: https://github.com/llvm/llvm-project/commit/ba4da5a087f28c9522bc7f173e99673bb3009af9
DIFF: https://github.com/llvm/llvm-project/commit/ba4da5a087f28c9522bc7f173e99673bb3009af9.diff

LOG: [ctx_prof] "Use" support for pre-thinlink. (#101338)

There is currently no plan to support contextual profiling use in a non-
ThinLTO scenario.

In the pre-link phase, we only instrument and then immediately bail out
to let the linker group functions under an entrypoint in the same module
as the entrypoint. We don't actually care what the profile contains -
just that we want to use a contextual profile.

After that, in post-thinlink, we require the profile be passed again so
we can actually use it. The earlier instrumentation will be used to
match counter values.

While the feature is in development, we add a hidden flag for the use
scenario, but we can eventually tie it to the `PGOOptions` mechanism. We
will use the same flag in both pre- and post-thinlink, because it
simplifies things - usually the post-thinlink args are the same as the
ones for pre-. This, despite the flag being basically treated as a
boolean in pre-thinlink.

Added: 
    llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll

Modified: 
    llvm/lib/Passes/PassBuilderPipelines.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index a6118726945e8..adebbb5eeba32 100644

--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -304,6 +304,10 @@ static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
 
+static cl::opt<std::string>
+    UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
+                  cl::desc("Use the specified contextual profile file"));
+
 namespace llvm {
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
 
@@ -1176,8 +1180,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   // Enable contextual profiling instrumentation.
   const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
                             PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+  const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
+                            Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
 
-  if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen)
+  if (IsPGOInstrGen || IsPGOInstrUse || IsMemprofUse || IsCtxProfGen ||
+      IsCtxProfUse)
     addPreInlinerPasses(MPM, Level, Phase);
 
   // Add all the requested passes for instrumentation PGO, if requested.
@@ -1187,8 +1194,13 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
                       /*IsCS=*/false, PGOOpt->AtomicCounterUpdate,
                       PGOOpt->ProfileFile, PGOOpt->ProfileRemappingFile,
                       PGOOpt->FS);
-  } else if (IsCtxProfGen) {
+  } else if (IsCtxProfGen || IsCtxProfUse) {
     MPM.addPass(PGOInstrumentationGen(false));
+    // In pre-link, we just want the instrumented IR. We use the contextual
+    // profile in the post-thinlink phase.
+    // The instrumentation will be removed in post-thinlink after IPO.
+    if (IsCtxProfUse)
+      return MPM;
     addPostPGOLoopRotation(MPM, Level);
     MPM.addPass(PGOCtxProfLoweringPass());
   }
@@ -1655,6 +1667,11 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   // can.
   MPM.addPass(buildModuleSimplificationPipeline(
       Level, ThinOrFullLTOPhase::ThinLTOPreLink));
+  // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
+  // thinlto use the contextual info to perform imports; then use the contextual
+  // profile in the post-thinlink phase.
+  if (!UseCtxProfile.empty() && !PGOOpt)
+    return MPM;
 
   // Run partial inlining pass to partially inline functions that have
   // large bodies.

diff  --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
new file mode 100644
index 0000000000000..b50a815be5abf
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; There is no profile, but that's OK because the prelink does not care about
+; the content of the profile, just that we intend to use one.
+; There is no scenario currently of doing ctx profile use without thinlto.
+;
+; RUN: opt -passes='thinlto-pre-link<O2>' -use-ctx-profile=something_that_does_not_exist %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) {
+; CHECK-LABEL: define void @foo(
+; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[A]], 0
+; CHECK-NEXT:    br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
+; CHECK:       [[YES]]:
+; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
+; CHECK-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT:    call void [[FCT]](i32 0)
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[NO]]:
+; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; CHECK-NEXT:    call void @bar()
+; CHECK-NEXT:    br label %[[EXIT]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+  %t = icmp eq i32 %a, 0
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}