[llvm] b2d3c31 - [ctx_prof] Fix checks in `PGOCtxprofFlattening` (#108467)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 17 18:19:23 PDT 2024
Author: Mircea Trofin
Date: 2024-09-17T18:19:20-07:00
New Revision: b2d3c315d510f55ac7ae0ca06f362c709fcacd12
URL: https://github.com/llvm/llvm-project/commit/b2d3c315d510f55ac7ae0ca06f362c709fcacd12
DIFF: https://github.com/llvm/llvm-project/commit/b2d3c315d510f55ac7ae0ca06f362c709fcacd12.diff
LOG: [ctx_prof] Fix checks in `PGOCtxprofFlattening` (#108467)
The assertion that all out-edges of a BB can't be 0 is incorrect: they
can be, if that branch is on a cold subgraph.
Added validators and asserts about the expected proprerties of the
propagated counters.
Added:
llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
Modified:
llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
llvm/test/ThinLTO/X86/ctxprof.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
index fc78d8c60ec050..e76689e2f5f0a5 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/Analysis.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
@@ -34,6 +35,7 @@
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
#include "llvm/Transforms/Scalar/DCE.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <deque>
using namespace llvm;
@@ -51,6 +53,11 @@ class ProfileAnnotator final {
class BBInfo {
std::optional<uint64_t> Count;
+ // OutEdges is dimensioned to match the number of terminator operands.
+ // Entries in the vector match the index in the terminator operand list. In
+ // some cases - see `shouldExcludeEdge` and its implementation - an entry
+ // will be nullptr.
+ // InEdges doesn't have the above constraint.
SmallVector<EdgeInfo *> OutEdges;
SmallVector<EdgeInfo *> InEdges;
size_t UnknownCountOutEdges = 0;
@@ -58,22 +65,30 @@ class ProfileAnnotator final {
// Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
// because all the edge counters must be known.
- uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
- bool AssumeAllKnown) const {
- uint64_t Sum = 0;
- for (const auto *E : Edges)
- if (E)
- Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
+ // Return std::nullopt if there were no edges to sum. The user can decide
+ // how to interpret that.
+ std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
+ bool AssumeAllKnown) const {
+ std::optional<uint64_t> Sum;
+ for (const auto *E : Edges) {
+ // `Edges` may be `OutEdges`, case in which `E` could be nullptr.
+ if (E) {
+ if (!Sum.has_value())
+ Sum = 0;
+ *Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
+ }
+ }
return Sum;
}
- void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
+ bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
assert(!Count.has_value());
Count = getEdgeSum(Edges, true);
+ return Count.has_value();
}
void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
- uint64_t KnownSum = getEdgeSum(Edges, false);
+ uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
EdgeInfo *E = nullptr;
for (auto *I : Edges)
@@ -110,17 +125,15 @@ class ProfileAnnotator final {
}
bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
- if (!succ_empty(&BB) && !UnknownCountOutEdges) {
- computeCountFrom(OutEdges);
- return true;
+ if (!UnknownCountOutEdges) {
+ return computeCountFrom(OutEdges);
}
return false;
}
bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
- if (!BB.isEntryBlock() && !UnknownCountInEdges) {
- computeCountFrom(InEdges);
- return true;
+ if (!UnknownCountInEdges) {
+ return computeCountFrom(InEdges);
}
return false;
}
@@ -178,7 +191,7 @@ class ProfileAnnotator final {
bool KeepGoing = true;
while (KeepGoing) {
KeepGoing = false;
- for (const auto &BB : reverse(F)) {
+ for (const auto &BB : F) {
auto &Info = getBBInfo(BB);
if (!Info.hasCount())
KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
@@ -198,6 +211,52 @@ class ProfileAnnotator final {
BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
+ const BBInfo &getBBInfo(const BasicBlock &BB) const {
+ return BBInfos.find(&BB)->second;
+ }
+
+ // validation function after we propagate the counters: all BBs and edges'
+ // counters must have a value.
+ bool allCountersAreAssigned() const {
+ for (const auto &BBInfo : BBInfos)
+ if (!BBInfo.second.hasCount())
+ return false;
+ for (const auto &EdgeInfo : EdgeInfos)
+ if (!EdgeInfo.Count.has_value())
+ return false;
+ return true;
+ }
+
+ /// Check that all paths from the entry basic block that use edges with
+ /// non-zero counts arrive at a basic block with no successors (i.e. "exit")
+ bool allTakenPathsExit() const {
+ std::deque<const BasicBlock *> Worklist;
+ DenseSet<const BasicBlock *> Visited;
+ Worklist.push_back(&F.getEntryBlock());
+ Visited.insert(&F.getEntryBlock());
+ while (!Worklist.empty()) {
+ const auto *BB = Worklist.front();
+ Worklist.pop_front();
+ if (succ_size(BB) <= 1)
+ continue;
+ const auto &BBInfo = getBBInfo(*BB);
+ bool Inserted = false;
+ for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
+ const auto *Succ = BB->getTerminator()->getSuccessor(I);
+ if (!shouldExcludeEdge(*BB, *Succ)) {
+ if (BBInfo.getEdgeCount(I) > 0)
+ if (Visited.insert(Succ).second) {
+ Worklist.push_back(Succ);
+ Inserted = true;
+ }
+ }
+ }
+ if (!Inserted)
+ return false;
+ }
+ return true;
+ }
+
public:
ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
InstrProfSummaryBuilder &PB)
@@ -216,6 +275,9 @@ class ProfileAnnotator final {
"profile is managed by IPO transforms");
(void)Index;
Count = Counters[Ins->getIndex()->getZExtValue()];
+ } else if (isa<UnreachableInst>(BB.getTerminator())) {
+ // The program presumably didn't crash.
+ Count = 0;
}
auto [It, Ins] =
BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
@@ -268,14 +330,16 @@ class ProfileAnnotator final {
PB.addInternalCount(EdgeCount);
}
- if (MaxCount == 0)
- F.getContext().emitError(
- "[ctx-prof] Encountered a BB with more than one successor, where "
- "all outgoing edges have a 0 count. This occurs in non-exiting "
- "functions (message pumps, usually) which are not supported in the "
- "contextual profiling case");
- setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
+ if (MaxCount != 0)
+ setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
}
+ assert(allCountersAreAssigned() &&
+ "Expected all counters have been assigned.");
+ assert(allTakenPathsExit() &&
+ "[ctx-prof] Encountered a BB with more than one successor, where "
+ "all outgoing edges have a 0 count. This occurs in non-exiting "
+ "functions (message pumps, usually) which are not supported in the "
+ "contextual profiling case");
}
};
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
new file mode 100644
index 00000000000000..7eea1c36afc37a
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-zero-path.ll
@@ -0,0 +1,55 @@
+; Check that flattened profile lowering handles cold subgraphs that end in "unreachable"
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+; RUN: opt -passes=ctx-prof-flatten %t/example.ll -use-ctx-profile=%t/profile.ctxprofdata -S -o - | FileCheck %s
+
+; CHECK-LABEL: entry:
+; CHECK: br i1 %t, label %yes, label %no, !prof ![[C1:[0-9]+]]
+; CHECK-LABEL: no:
+; CHECK-NOT: !prof
+; CHECK-LABEL: no1:
+; CHECK-NOT: !prof
+; CHECK-LABEL: no2:
+; CHECK-NOT: !prof
+; CHECK-LABEL: yes:
+; CHECK: br i1 %t3, label %yes1, label %yes2, !prof ![[C1]]
+; CHECK-NOT: !prof
+; CHECK: ![[C1]] = !{!"branch_weights", i32 6, i32 0}
+
+;--- example.ll
+define void @f1(i32 %cond) !guid !0 {
+entry:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 0)
+ %t = icmp eq i32 %cond, 1
+ br i1 %t, label %yes, label %no
+
+no:
+ %t2 = icmp eq i32 %cond, 2
+ br i1 %t2, label %no1, label %no2
+no1:
+ unreachable
+no2:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 1)
+ unreachable
+yes:
+ %t3 = icmp eq i32 %cond, 3
+ br i1 %t3, label %yes1, label %yes2
+yes1:
+ br label %exit
+yes2:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 2)
+ %t4 = icmp eq i32 %cond, 4
+ br i1 %t4, label %yes3, label %yes4
+yes3:
+ br label %exit
+yes4:
+ call void @llvm.instrprof.increment(ptr @f1, i64 42, i32 42, i32 3)
+ unreachable
+exit:
+ ret void
+}
+
+!0 = !{i64 1234}
+
+;--- profile.json
+[{"Guid":1234, "Counters":[6,0,0,0]}]
diff --git a/llvm/test/ThinLTO/X86/ctxprof.ll b/llvm/test/ThinLTO/X86/ctxprof.ll
index 1e30b90ec23d58..4baea3b25890eb 100644
--- a/llvm/test/ThinLTO/X86/ctxprof.ll
+++ b/llvm/test/ThinLTO/X86/ctxprof.ll
@@ -47,18 +47,21 @@
; NOPROF-1-NOT: m2_f1()
; NOPROF-2-NOT: m1_f1()
;
-; The run with workload definitions - same other options.
+; The run with workload definitions - same other options. We do need to re-generate the .bc
+; files, to include instrumentation.
+; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m1.ll -o %t/m1-instr.bc
+; RUN: opt -module-summary -passes=assign-guid,ctx-instr-gen %t/m2.ll -o %t/m2-instr.bc
;
; RUN: echo '[ \
; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \
; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \
; RUN: ]' > %t_exp/ctxprof.json
; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream
-; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
+; RUN: llvm-lto2 run %t/m1-instr.bc %t/m2-instr.bc \
; RUN: -o %t_exp/result.o -save-temps \
; RUN: -use-ctx-profile=%t_exp/ctxprof.bitstream \
-; RUN: -r %t/m1.bc,m1_f1,plx \
-; RUN: -r %t/m2.bc,m2_f1,plx
+; RUN: -r %t/m1-instr.bc,m1_f1,plx \
+; RUN: -r %t/m2-instr.bc,m2_f1,plx
; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
;
More information about the llvm-commits
mailing list