[llvm-branch-commits] [llvm] [ctx_prof] Handle `select` and its `step` instrumentation (PR #109185)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Sep 18 12:59:27 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-pgo

Author: Mircea Trofin (mtrofin)

<details>
<summary>Changes</summary>

The `step` instrumentation shouldn't be treated, during use, like an `increment`. The latter is treated as a BB ID. The step isn't that, it's more of a type of value profiling. We need to distinguish between the 2 when really looking for BB IDs (==increments), and handle appropriately `step`s. In particular, we need to know when to elide them because `select`s may get elided by function cloning, if the condition of the select is statically known.

---
Full diff: https://github.com/llvm/llvm-project/pull/109185.diff


5 Files Affected:

- (modified) llvm/include/llvm/Analysis/CtxProfAnalysis.h (+3) 
- (modified) llvm/lib/Analysis/CtxProfAnalysis.cpp (+9) 
- (modified) llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp (+43-2) 
- (modified) llvm/lib/Transforms/Utils/InlineFunction.cpp (+9-1) 
- (added) llvm/test/Analysis/CtxProfAnalysis/handle-select.ll (+76) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index b3e64b26ee543c..0a5beb92fcbcc0 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -117,6 +117,9 @@ class CtxProfAnalysis : public AnalysisInfoMixin<CtxProfAnalysis> {
 
   /// Get the instruction instrumenting a BB, or nullptr if not present.
   static InstrProfIncrementInst *getBBInstrumentation(BasicBlock &BB);
+
+  /// Get the step instrumentation associated with a `select`
+  static InstrProfIncrementInstStep *getSelectInstrumentation(SelectInst &SI);
 };
 
 class CtxProfAnalysisPrinterPass
diff --git a/llvm/lib/Analysis/CtxProfAnalysis.cpp b/llvm/lib/Analysis/CtxProfAnalysis.cpp
index 3df72983862d98..7517011395a7d6 100644
--- a/llvm/lib/Analysis/CtxProfAnalysis.cpp
+++ b/llvm/lib/Analysis/CtxProfAnalysis.cpp
@@ -254,6 +254,15 @@ InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
   return nullptr;
 }
 
+InstrProfIncrementInstStep *
+CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
+  Instruction *Prev = &SI;
+  while ((Prev = Prev->getPrevNode()))
+    if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
+      return Step;
+  return nullptr;
+}
+
 template <class ProfilesTy, class ProfTy>
 static void preorderVisit(ProfilesTy &Profiles,
                           function_ref<void(ProfTy &)> Visitor,
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index 91f950e2ba4c3e..30bb251364fdef 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -154,6 +154,8 @@ class ProfileAnnotator final {
 
     bool hasCount() const { return Count.has_value(); }
 
+    uint64_t getCount() const { return *Count;}
+
     bool trySetSingleUnknownInEdgeCount() {
       if (UnknownCountInEdges == 1) {
         setSingleUnknownEdgeCount(InEdges);
@@ -266,6 +268,21 @@ class ProfileAnnotator final {
     return HitExit;
   }
 
+  bool allNonColdSelectsHaveProfile() const {
+    for (const auto &BB : F) {
+      if (getBBInfo(BB).getCount() > 0) {
+        for (const auto &I : BB) {
+          if (const auto *SI = dyn_cast<SelectInst>(&I)) {
+            if (!SI->getMetadata(LLVMContext::MD_prof)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+    return true;
+  }
+
 public:
   ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
                    InstrProfSummaryBuilder &PB)
@@ -324,12 +341,33 @@ class ProfileAnnotator final {
     PB.addEntryCount(Counters[0]);
 
     for (auto &BB : F) {
+      const auto &BBInfo = getBBInfo(BB);
+      if (BBInfo.getCount() > 0) {
+        for (auto &I : BB) {
+          if (auto *SI = dyn_cast<SelectInst>(&I)) {
+            if (auto *Step = CtxProfAnalysis::getSelectInstrumentation(*SI)) {
+              auto Index = Step->getIndex()->getZExtValue();
+              assert(Index < Counters.size() &&
+                    "The index of the step instruction must be inside the "
+                    "counters vector by "
+                    "construction - tripping this assertion indicates a bug in "
+                    "how the contextual profile is managed by IPO transforms");
+              auto TotalCount = BBInfo.getCount();
+              auto TrueCount = Counters[Index];
+              auto FalseCount =
+                  (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
+              setProfMetadata(F.getParent(), SI, {TrueCount, FalseCount},
+                              std::max(TrueCount, FalseCount));
+            }
+          }
+        }
+      }
       if (succ_size(&BB) < 2)
         continue;
       auto *Term = BB.getTerminator();
       SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
       uint64_t MaxCount = 0;
-      const auto &BBInfo = getBBInfo(BB);
+
       for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
            ++SuccIdx) {
         uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
@@ -343,12 +381,15 @@ class ProfileAnnotator final {
         setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
     }
     assert(allCountersAreAssigned() &&
-           "Expected all counters have been assigned.");
+           "[ctx-prof] Expected all counters have been assigned.");
     assert(allTakenPathsExit() &&
            "[ctx-prof] Encountered a BB with more than one successor, where "
            "all outgoing edges have a 0 count. This occurs in non-exiting "
            "functions (message pumps, usually) which are not supported in the "
            "contextual profiling case");
+    assert(allNonColdSelectsHaveProfile() &&
+           "[ctx-prof] All non-cold select instructions were expected to have "
+           "a profile.");
   }
 };
 
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 2e05fa80464b8d..257850f7e6d2af 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -2211,7 +2211,15 @@ remapIndices(Function &Caller, BasicBlock *StartBB,
     }
     for (auto &I : llvm::make_early_inc_range(*BB)) {
       if (auto *Inc = dyn_cast<InstrProfIncrementInst>(&I)) {
-        if (Inc != BBID) {
+        if (isa<InstrProfIncrementInstStep>(Inc)) {
+          if (isa<Constant>(Inc->getStep())) {
+            assert(!Inc->getNextNode() || !isa<SelectInst>(Inc->getNextNode()));
+            Inc->eraseFromParent();
+          } else {
+            assert(isa_and_nonnull<SelectInst>(Inc->getNextNode()));
+            RewriteInstrIfNeeded(*Inc);
+          }
+        } else if (Inc != BBID) {
           // If we're here it means that the BB had more than 1 IDs, presumably
           // some coming from the callee. We "made up our mind" to keep the
           // first one (which may or may not have been originally the caller's).
diff --git a/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll
new file mode 100644
index 00000000000000..e740466a03f3e9
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/handle-select.ll
@@ -0,0 +1,76 @@
+; Check that we handle `step` instrumentations. These addorn `select`s.
+; We don't want to confuse the `step` with normal increments, the latter of which
+; we use for BB ID-ing: we want to keep the `step`s after inlining, except if
+; the `select` is elided.
+;
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+;
+; RUN: opt -passes=ctx-instr-gen %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=INSTR
+; RUN: opt -passes=ctx-instr-gen,module-inline %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=POST-INL
+; RUN: opt -passes=ctx-instr-gen,module-inline,ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s --check-prefix=FLATTEN
+
+; INSTR-LABEL: yes:
+; INSTR-NEXT:   call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
+; INSTR-NEXT:   call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 0, ptr @bar)
+
+; INSTR-LABEL: no:
+; INSTR-NEXT:   call void @llvm.instrprof.callsite(ptr @foo, i64 [[#]], i32 2, i32 1, ptr @bar)
+
+; INSTR-LABEL: define i32 @bar
+; INSTR-NEXT:   call void @llvm.instrprof.increment(ptr @bar, i64 [[#]], i32 2, i32 0)
+; INSTR-NEXT:   %inc =
+; INSTR:        %test = icmp eq i32 %t, 0
+; INSTR-NEXT:   %1  = zext i1 %test to i64
+; INSTR-NEXT:   call void @llvm.instrprof.increment.step(ptr @bar, i64 [[#]], i32 2, i32 1, i64 %1)
+; INSTR-NEXT:   %res = select
+
+; POST-INL-LABEL: yes:
+; POST-INL-NEXT:   call void @llvm.instrprof.increment
+; POST-INL:        call void @llvm.instrprof.increment.step
+; POST-INL-NEXT:   %res.i = select
+
+; POST-INL-LABEL: no:
+; POST-INL-NEXT:   call void @llvm.instrprof.increment
+; POST-INL-NEXT:   br label
+
+; POST-INL-LABEL: exit:
+; POST-INL-NEXT:   %res = phi i32 [ %res.i, %yes ], [ 1, %no ]
+
+; FLATTEN-LABEL: yes:
+; FLATTEN:          %res.i = select i1 %test.i, i32 %inc.i, i32 %dec.i, !prof ![[SELPROF:[0-9]+]]
+; FLATTEN-LABEL: no:
+;
+; See the profile, in the "yes" case we set the step counter's value, in @bar, to 3. The total
+; entry count of that BB is 4.
+; ![[SELPROF]] = !{!"branch_weights", i32 3, i32 1}
+
+;--- example.ll
+define i32 @foo(i32 %t) !guid !0 {
+  %test = icmp slt i32 %t, 0
+  br i1 %test, label %yes, label %no
+yes:
+  %res1 = call i32 @bar(i32 %t) alwaysinline
+  br label %exit
+no:
+  ; this will result in eliding the select in @bar, when inlined.
+  %res2 = call i32 @bar(i32 0) alwaysinline
+  br label %exit
+exit:
+  %res = phi i32 [%res1, %yes], [%res2, %no]
+  ret i32 %res
+}
+
+define i32 @bar(i32 %t) !guid !1 {
+  %inc = add i32 %t, 1
+  %dec = sub i32 %t, 1
+  %test = icmp eq i32 %t, 0
+  %res = select i1 %test, i32 %inc, i32 %dec
+  ret i32 %res
+}
+
+!0 = !{i64 1234}
+!1 = !{i64 5678}
+
+;--- profile.json
+[{"Guid":1234, "Counters":[10, 4], "Callsites":[[{"Guid": 5678, "Counters":[4,3]}],[{"Guid": 5678, "Counters":[6,6]}]]}]

``````````

</details>


https://github.com/llvm/llvm-project/pull/109185


More information about the llvm-branch-commits mailing list