[llvm] 4a2bf05 - Reapply "[ctx_prof] Fix the pre-thinlink "use" case (#102511)"

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 8 17:05:23 PDT 2024


Author: Mircea Trofin
Date: 2024-08-08T17:04:00-07:00
New Revision: 4a2bf05980fd59013e74603f961d4a52cf8db940

URL: https://github.com/llvm/llvm-project/commit/4a2bf05980fd59013e74603f961d4a52cf8db940
DIFF: https://github.com/llvm/llvm-project/commit/4a2bf05980fd59013e74603f961d4a52cf8db940.diff

LOG: Reapply "[ctx_prof] Fix the pre-thinlink "use" case (#102511)"

This reverts commit 967185eeb85abb77bd6b6cdd2b026d5c54b7d4f3.

The problem was link dependencies, moved `UseCtxProfile` to `Analysis`.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
    llvm/lib/Analysis/CtxProfAnalysis.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
index 5256aff56205b..f127d16b8de12 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfLowering.h
@@ -19,7 +19,8 @@ class Type;
 class PGOCtxProfLoweringPass : public PassInfoMixin<PGOCtxProfLoweringPass> {
 public:
   explicit PGOCtxProfLoweringPass() = default;
-  static bool isContextualIRPGOEnabled();
+  // True if contextual instrumentation is enabled.
+  static bool isCtxIRPGOInstrEnabled();
 
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 };

diff  --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index f56f910bd7784..fbae705127538 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -17,11 +17,17 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/ProfileData/PGOCtxProfReader.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 #define DEBUG_TYPE "ctx_prof"
 
+using namespace llvm;
+cl::opt<std::string>
+    UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
+                  cl::desc("Use the specified contextual profile file"));
+
 namespace llvm {
 namespace json {
 Value toJSON(const PGOCtxProfContext &P) {
@@ -58,8 +64,6 @@ Value toJSON(const PGOCtxProfContext::CallTargetMapTy &P) {
 } // namespace json
 } // namespace llvm
 
-using namespace llvm;
-
 AnalysisKey CtxProfAnalysis::Key;
 
 CtxProfAnalysis::Result CtxProfAnalysis::run(Module &M,

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index c175ee8980984..6927a2886b962 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -304,9 +304,7 @@ static cl::opt<bool> UseLoopVersioningLICM(
     "enable-loop-versioning-licm", cl::init(false), cl::Hidden,
     cl::desc("Enable the experimental Loop Versioning LICM pass"));
 
-cl::opt<std::string>
-    UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
-                  cl::desc("Use the specified contextual profile file"));
+extern cl::opt<std::string> UseCtxProfile;
 
 namespace llvm {
 extern cl::opt<bool> EnableMemProfContextDisambiguation;
@@ -1173,13 +1171,12 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   const bool IsMemprofUse = IsPGOPreLink && !PGOOpt->MemoryProfile.empty();
   // We don't want to mix pgo ctx gen and pgo gen; we also don't currently
   // enable ctx profiling from the frontend.
-  assert(
-      !(IsPGOInstrGen && PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) &&
-      "Enabling both instrumented FDO and contextual instrumentation is not "
-      "supported.");
+  assert(!(IsPGOInstrGen && PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled()) &&
+         "Enabling both instrumented PGO and contextual instrumentation is not "
+         "supported.");
   // Enable contextual profiling instrumentation.
   const bool IsCtxProfGen = !IsPGOInstrGen && IsPreLink &&
-                            PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+                            PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled();
   const bool IsCtxProfUse = !UseCtxProfile.empty() && !PGOOpt &&
                             Phase == ThinOrFullLTOPhase::ThinLTOPreLink;
 
@@ -1670,8 +1667,10 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   // In pre-link, for ctx prof use, we stop here with an instrumented IR. We let
   // thinlto use the contextual info to perform imports; then use the contextual
   // profile in the post-thinlink phase.
-  if (!UseCtxProfile.empty() && !PGOOpt)
+  if (!UseCtxProfile.empty() && !PGOOpt) {
+    addRequiredLTOPreLinkPasses(MPM);
     return MPM;
+  }
 
   // Run partial inlining pass to partially inline functions that have
   // large bodies.

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
index de1d4d2381c06..d6ba12465bb32 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp
@@ -30,7 +30,7 @@ static cl::list<std::string> ContextRoots(
         "root of an interesting graph, which will be profiled independently "
         "from other similar graphs."));
 
-bool PGOCtxProfLoweringPass::isContextualIRPGOEnabled() {
+bool PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() {
   return !ContextRoots.empty();
 }
 

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 1ce8f58c1aa14..41618194d12ed 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -321,6 +321,7 @@ static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
              " greater than this threshold."));
 
 extern cl::opt<unsigned> MaxNumVTableAnnotations;
+extern cl::opt<std::string> UseCtxProfile;
 
 namespace llvm {
 // Command line option to turn on CFG dot dump after profile annotation.
@@ -338,9 +339,12 @@ extern cl::opt<bool> EnableVTableProfileUse;
 extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
 } // namespace llvm
 
+bool shouldInstrumentForCtxProf() {
+  return PGOCtxProfLoweringPass::isCtxIRPGOInstrEnabled() ||
+         !UseCtxProfile.empty();
+}
 bool shouldInstrumentEntryBB() {
-  return PGOInstrumentEntry ||
-         PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+  return PGOInstrumentEntry || shouldInstrumentForCtxProf();
 }
 
 // FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
@@ -348,8 +352,7 @@ bool shouldInstrumentEntryBB() {
 // Supporting other values is relatively straight-forward - just another counter
 // range within the context.
 bool isValueProfilingDisabled() {
-  return DisableValueProfiling ||
-         PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+  return DisableValueProfiling || shouldInstrumentForCtxProf();
 }
 
 // Return a string describing the branch condition that can be
@@ -902,7 +905,7 @@ static void instrumentOneFunc(
   unsigned NumCounters =
       InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
 
-  if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
+  if (shouldInstrumentForCtxProf()) {
     auto *CSIntrinsic =
         Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
     // We want to count the instrumentable callsites, then instrument them. This
@@ -1861,7 +1864,7 @@ static bool InstrumentAllFunctions(
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
-  if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
+  if (!IsCS && !shouldInstrumentForCtxProf())
     createIRLevelProfileFlagVar(M, /*IsCS=*/false);
 
   Triple TT(M.getTargetTriple());
@@ -2112,7 +2115,7 @@ static bool annotateAllFunctions(
   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
   if (PGOInstrumentEntry.getNumOccurrences() > 0)
     InstrumentFuncEntry = PGOInstrumentEntry;
-  InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
+  InstrumentFuncEntry |= shouldInstrumentForCtxProf();
 
   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {

diff  --git a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
index b50a815be5abf..18ac2f92aa39d 100644
--- a/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
+++ b/llvm/test/Transforms/PGOProfile/ctx-prof-use-prelink.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
 ; There is no profile, but that's OK because the prelink does not care about
 ; the content of the profile, just that we intend to use one.
 ; There is no scenario currently of doing ctx profile use without thinlto.
@@ -7,19 +7,22 @@
 
 declare void @bar()
 
+;.
+; CHECK: @__profn_foo = private constant [3 x i8] c"foo"
+;.
 define void @foo(i32 %a, ptr %fct) {
 ; CHECK-LABEL: define void @foo(
 ; CHECK-SAME: i32 [[A:%.*]], ptr [[FCT:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
 ; CHECK-NEXT:    [[T:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[T]], label %[[YES:.*]], label %[[NO:.*]]
 ; CHECK:       [[YES]]:
 ; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1)
-; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[FCT]] to i64
-; CHECK-NEXT:    call void @llvm.instrprof.value.profile(ptr @__profn_foo, i64 728453322856651412, i64 [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT:    call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0, ptr [[FCT]])
 ; CHECK-NEXT:    call void [[FCT]](i32 0)
 ; CHECK-NEXT:    br label %[[EXIT:.*]]
 ; CHECK:       [[NO]]:
-; CHECK-NEXT:    call void @llvm.instrprof.increment(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 0)
+; CHECK-NEXT:    call void @llvm.instrprof.callsite(ptr @__profn_foo, i64 728453322856651412, i32 2, i32 1, ptr @bar)
 ; CHECK-NEXT:    call void @bar()
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
@@ -36,3 +39,6 @@ no:
 exit:
   ret void
 }
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+;.


        


More information about the llvm-commits mailing list