[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)
Mircea Trofin via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Sep 5 14:04:18 PDT 2024
https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/107329
>From 856568c07d924dd59aaa81450cb8bcb64d60d2eb Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 3 Sep 2024 21:28:05 -0700
Subject: [PATCH] [ctx_prof] Flattened profile lowering pass
---
llvm/include/llvm/ProfileData/ProfileCommon.h | 6 +-
.../Instrumentation/PGOCtxProfFlattening.h | 25 ++
llvm/lib/Passes/PassBuilder.cpp | 1 +
llvm/lib/Passes/PassBuilderPipelines.cpp | 1 +
llvm/lib/Passes/PassRegistry.def | 1 +
.../Transforms/Instrumentation/CMakeLists.txt | 1 +
.../Instrumentation/PGOCtxProfFlattening.cpp | 341 ++++++++++++++++++
.../flatten-always-removes-instrumentation.ll | 12 +
.../CtxProfAnalysis/flatten-and-annotate.ll | 112 ++++++
9 files changed, 497 insertions(+), 3 deletions(-)
create mode 100644 llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index eaab59484c947a..edd8e1f644ad12 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -79,13 +79,13 @@ class ProfileSummaryBuilder {
class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
uint64_t MaxInternalBlockCount = 0;
- inline void addEntryCount(uint64_t Count);
- inline void addInternalCount(uint64_t Count);
-
public:
InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs)
: ProfileSummaryBuilder(std::move(Cutoffs)) {}
+ void addEntryCount(uint64_t Count);
+ void addInternalCount(uint64_t Count);
+
void addRecord(const InstrProfRecord &);
std::unique_ptr<ProfileSummary> getSummary();
};
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
new file mode 100644
index 00000000000000..0eab3aaf6fcad3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
@@ -0,0 +1,25 @@
+//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfFlattening class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+
+#include "llvm/IR/PassManager.h"
+namespace llvm {
+
+class PGOCtxProfFlatteningPass
+ : public PassInfoMixin<PGOCtxProfFlatteningPass> {
+public:
+ explicit PGOCtxProfFlatteningPass() = default;
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a22abed8051a11..d87e64eff08966 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -198,6 +198,7 @@
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1fd7ef929c87d5..38297dc02b8be6 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -76,6 +76,7 @@
#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d6067089c6b5c1..2b0624cb9874da 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass())
MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
MODULE_PASS("ctx-instr-gen",
PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
+MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
MODULE_PASS("debugify", NewPMDebugifyPass())
MODULE_PASS("dfsan", DataFlowSanitizerPass())
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index deab37801ff1df..d45b07447d09da 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation
InstrProfiling.cpp
KCFI.cpp
LowerAllowCheckPass.cpp
+ PGOCtxProfFlattening.cpp
PGOCtxProfLowering.cpp
PGOForceFunctionAttrs.cpp
PGOInstrumentation.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
new file mode 100644
index 00000000000000..2139519a883ac7
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -0,0 +1,341 @@
+//===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Flattens the contextual profile and lowers it to MD_prof.
+// This should happen after all IPO (which is assumed to have maintained the
+// contextual profile) happened. Flattening consists of summing the values at
+// the same index of the counters belonging to all the contexts of a function.
+// The lowering consists of materializing the counter values to function
+// entrypoint counts and branch probabilities.
+//
+// This pass also removes contextual instrumentation, which has been kept around
+// to facilitate its functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/DCE.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+class ProfileAnnotator final {
+ class BBInfo;
+ struct EdgeInfo {
+ BBInfo *const Src;
+ BBInfo *const Dest;
+ std::optional<uint64_t> Count;
+
+ explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
+ };
+
+ class BBInfo {
+ std::optional<uint64_t> Count;
+ SmallVector<EdgeInfo *> OutEdges;
+ SmallVector<EdgeInfo *> InEdges;
+ size_t UnknownCountOutEdges = 0;
+ size_t UnknownCountInEdges = 0;
+
+ uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
+ bool AssumeAllKnown) const {
+ uint64_t Sum = 0;
+ for (const auto *E : Edges)
+ if (E)
+ Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
+ return Sum;
+ }
+
+ void takeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
+ assert(!Count.has_value());
+ Count = getEdgeSum(Edges, true);
+ }
+
+ void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
+ uint64_t KnownSum = getEdgeSum(Edges, false);
+ uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
+ EdgeInfo *E = nullptr;
+ for (auto *I : Edges)
+ if (I && !I->Count.has_value()) {
+ E = I;
+#ifdef NDEBUG
+ break;
+#else
+ assert((!E || E == I) &&
+ "Expected exactly one edge to have an unknown count, "
+ "found a second one");
+ continue;
+#endif
+ }
+ assert(E && "Expected exactly one edge to have an unknown count");
+ assert(!E->Count.has_value());
+ E->Count = EdgeVal;
+ assert(E->Src->UnknownCountOutEdges > 0);
+ assert(E->Dest->UnknownCountInEdges > 0);
+ --E->Src->UnknownCountOutEdges;
+ --E->Dest->UnknownCountInEdges;
+ }
+
+ public:
+ BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
+ : Count(Count) {
+ InEdges.reserve(NumInEdges);
+ OutEdges.resize(NumOutEdges);
+ }
+
+ bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
+ if (!succ_empty(&BB) && !UnknownCountOutEdges) {
+ takeCountFrom(OutEdges);
+ return true;
+ }
+ return false;
+ }
+
+ bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
+ if (!BB.isEntryBlock() && !UnknownCountInEdges) {
+ takeCountFrom(InEdges);
+ return true;
+ }
+ return false;
+ }
+
+ void addInEdge(EdgeInfo *Info) {
+ InEdges.push_back(Info);
+ ++UnknownCountInEdges;
+ }
+
+ void addOutEdge(size_t Index, EdgeInfo *Info) {
+ OutEdges[Index] = Info;
+ ++UnknownCountOutEdges;
+ }
+
+ bool hasCount() const { return Count.has_value(); }
+
+ bool trySetSingleUnknownInEdgeCount() {
+ if (UnknownCountInEdges == 1) {
+ setSingleUnknownEdgeCount(InEdges);
+ return true;
+ }
+ return false;
+ }
+
+ bool trySetSingleUnknownOutEdgeCount() {
+ if (UnknownCountOutEdges == 1) {
+ setSingleUnknownEdgeCount(OutEdges);
+ return true;
+ }
+ return false;
+ }
+ size_t getNumOutEdges() const { return OutEdges.size(); }
+
+ uint64_t getEdgeCount(size_t Index) const {
+ if (auto *E = OutEdges[Index])
+ return *E->Count;
+ return 0U;
+ }
+ };
+
+ Function &F;
+ const SmallVectorImpl<uint64_t> &Counters;
+ // To be accessed through getBBInfo() after construction.
+ std::map<const BasicBlock *, BBInfo> BBInfos;
+ std::vector<EdgeInfo> EdgeInfos;
+ InstrProfSummaryBuilder &PB;
+
+ // This is an adaptation of PGOUseFunc::populateCounters.
+ // FIXME(mtrofin): look into factoring the code to share one implementation.
+ void propagateCounterValues(const SmallVectorImpl<uint64_t> &Counters) {
+ bool KeepGoing = true;
+ while (KeepGoing) {
+ KeepGoing = false;
+ for (const auto &BB : reverse(F)) {
+ auto &Info = getBBInfo(BB);
+ if (!Info.hasCount())
+ KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
+ Info.tryTakeCountFromKnownInEdges(BB);
+ if (Info.hasCount()) {
+ KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
+ KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
+ }
+ }
+ }
+ }
+ // The only criteria for exclusion is faux suspend -> exit edges in presplit
+ // coroutines. The API serves for readability, currently.
+ bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
+ return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
+ }
+
+ BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
+
+public:
+ ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
+ InstrProfSummaryBuilder &PB)
+ : F(F), Counters(Counters), PB(PB) {
+ assert(!F.isDeclaration());
+ assert(!Counters.empty());
+ size_t NrEdges = 0;
+ for (const auto &BB : F) {
+ std::optional<uint64_t> Count;
+ if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(
+ const_cast<BasicBlock &>(BB))) {
+ auto Index = Ins->getIndex()->getZExtValue();
+ assert(Index < Counters.size() &&
+ "The index must be inside the counters vector by construction - "
+ "tripping this assertion indicates a bug in how the contextual "
+ "profile is managed by IPO transforms");
+ Count = Counters[Ins->getIndex()->getZExtValue()];
+ }
+ auto [It, Ins] =
+ BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
+ (void)Ins;
+ assert(Ins && "We iterate through the function's BBs, no reason to "
+ "insert one more than once");
+ NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
+ return !shouldExcludeEdge(BB, *Succ);
+ });
+ }
+ // Pre-allocate the vector, we want references to its contents to be stable.
+ EdgeInfos.reserve(NrEdges);
+ for (const auto &BB : F) {
+ auto &Info = getBBInfo(BB);
+ for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
+ const auto *Succ = BB.getTerminator()->getSuccessor(I);
+ if (!shouldExcludeEdge(BB, *Succ)) {
+ auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
+ Info.addOutEdge(I, &EI);
+ getBBInfo(*Succ).addInEdge(&EI);
+ }
+ }
+ }
+ assert(EdgeInfos.capacity() == NrEdges &&
+ "The capacity of EdgeInfos should have stayed unchanged it was "
+ "populated, because we need pointers to its contents to be stable");
+ }
+
+ /// Assign branch weights and function entry count. Also update the PSI
+ /// builder.
+ void assignProfileData() {
+ assert(!Counters.empty());
+ propagateCounterValues(Counters);
+ F.setEntryCount(Counters[0]);
+ PB.addEntryCount(Counters[0]);
+
+ for (auto &BB : F) {
+ if (succ_size(&BB) < 2)
+ continue;
+ auto *Term = BB.getTerminator();
+ SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
+ uint64_t MaxCount = 0;
+ const auto &BBInfo = getBBInfo(BB);
+ for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
+ ++SuccIdx) {
+ uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
+ if (EdgeCount > MaxCount)
+ MaxCount = EdgeCount;
+ EdgeCounts[SuccIdx] = EdgeCount;
+ PB.addInternalCount(EdgeCount);
+ }
+
+ if (MaxCount == 0)
+ F.getContext().emitError(
+ "[ctx-prof] Encountered a BB with more than one successor, where "
+ "all outgoing edges have a 0 count. This occurs in non-exiting "
+ "functions (message pumps, usually) which are not supported in the "
+ "contextual profiling case");
+ setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
+ }
+ }
+};
+
+bool areAllBBsReachable(const Function &F, FunctionAnalysisManager &FAM) {
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+ return llvm::all_of(
+ F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); });
+}
+
+void clearColdFunctionProfile(Function &F) {
+ for (auto &BB : F)
+ BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr);
+ F.setEntryCount(0U);
+}
+
+void removeInstrumentation(Function &F) {
+ for (auto &BB : F)
+ for (auto &I : llvm::make_early_inc_range(BB))
+ if (isa<InstrProfCntrInstBase>(I))
+ I.eraseFromParent();
+}
+
+} // namespace
+
+PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
+ ModuleAnalysisManager &MAM) {
+ // Ensure in all cases the instrumentation is removed: if this module had no
+ // roots, the contextual profile would evaluate to false, but there would
+ // still be instrumentation.
+ // Note: in such cases we leave as-is any other profile info (if present -
+ // e.g. synthetic weights, etc) because it wouldn't interfere with the
+ // contextual - based one (which would be in other modules)
+ auto OnExit = llvm::make_scope_exit([&]() {
+ for (auto &F : M)
+ removeInstrumentation(F);
+ });
+ auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
+ if (!CtxProf)
+ return PreservedAnalyses::all();
+
+ const auto FlattenedProfile = CtxProf.flatten();
+
+ InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
+ for (auto &F : M) {
+ if (F.isDeclaration())
+ continue;
+
+ if (!areAllBBsReachable(F,
+ MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
+ .getManager())) {
+ M.getContext().emitError(
+ "[ctx-prof] Function has unreacheable basic blocks: " + F.getName());
+ continue;
+ }
+
+ const auto &FlatProfile =
+ FlattenedProfile.lookup(AssignGUIDPass::getGUID(F));
+ // If this function didn't appear in the contextual profile, it's cold.
+ if (FlatProfile.empty())
+ clearColdFunctionProfile(F);
+ else {
+ ProfileAnnotator S(F, FlatProfile, PB);
+ S.assignProfileData();
+ }
+ }
+
+ auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+
+ M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
+ ProfileSummary::Kind::PSK_Instr);
+ PSI.refresh();
+ return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
new file mode 100644
index 00000000000000..c1c9cfa5a4f471
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
@@ -0,0 +1,12 @@
+; RUN: opt -passes=ctx-prof-flatten %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo() {
+ call void @llvm.instrprof.increment(ptr @foo, i64 123, i32 1, i32 0)
+ call void @llvm.instrprof.callsite(ptr @foo, i64 123, i32 1, i32 0, ptr @bar)
+ call void @bar()
+ ret void
+}
+
+; CHECK-NOT: call void @llvm.instrprof
\ No newline at end of file
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
new file mode 100644
index 00000000000000..b7950b26a3ef27
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -0,0 +1,112 @@
+; REQUIRES: x86_64-linux
+;
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN: %t/example.ll -S -o %t/prelink.ll
+; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
+; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S | FileCheck %s
+;
+;
+; Check that instrumentation occurs where expected: the "no" block for both foo and
+; @an_entrypoint - which explains the subsequent branch weights
+;
+; PRELINK-LABEL: @foo
+; PRELINK-LABEL: yes:
+; PRELINK-LABEL: no:
+; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
+
+; PRELINK-LABEL: @an_entrypoint
+; PRELINK-LABEL: yes:
+; PRELINK-NEXT: call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
+; PRELINK-NOT: "ProfileSummary"
+
+; Check that the output has:
+; - no instrumentation
+; - the 2 functions have an entry count
+; - each conditional branch has profile annotation
+;
+; CHECK-NOT: call void @llvm.instrprof
+;
+; make sure we have function entry counts, branch weights, and a profile summary.
+; CHECK-LABEL: @foo
+; CHECK-SAME: !prof ![[FOO_EP:[0-9]+]]
+; CHECK: br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
+; CHECK-LABEL: @an_entrypoint
+; CHECK-SAME: !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
+; CHECK: br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
+
+
+; CHECK: ![[#]] = !{i32 1, !"ProfileSummary", !1}
+; CHECK: ![[#]] = !{!"TotalCount", i64 480}
+; CHECK: ![[#]] = !{!"MaxCount", i64 140}
+; CHECK: ![[#]] = !{!"MaxInternalCount", i64 125}
+; CHECK: ![[#]] = !{!"MaxFunctionCount", i64 140}
+; CHECK: ![[#]] = !{!"NumCounts", i64 6}
+; CHECK: ![[#]] = !{!"NumFunctions", i64 2}
+;
+; @foo will be called both unconditionally and conditionally, on the "yes" branch
+; which has a count of 40. So 140 times.
+
+; CHECK: ![[FOO_EP]] = !{!"function_entry_count", i64 140}
+
+; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
+; Which means its "yes" branch is taken 140 - 15 times.
+
+; CHECK: ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15}
+; CHECK: ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
+; CHECK: ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60}
+
+;--- profile.json
+[
+ {
+ "Guid": 4909520559318251808,
+ "Counters": [100, 40],
+ "Callsites": [
+ [
+ {
+ "Guid": 11872291593386833696,
+ "Counters": [ 100, 5 ]
+ }
+ ],
+ [
+ {
+ "Guid": 11872291593386833696,
+ "Counters": [ 40, 10 ]
+ }
+ ]
+ ]
+ }
+]
+;--- example.ll
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) #0 !guid !0 {
+ %t = icmp sgt i32 %a, 7
+ br i1 %t, label %yes, label %no
+yes:
+ call void %fct(i32 %a)
+ br label %exit
+no:
+ call void @bar()
+ br label %exit
+exit:
+ ret void
+}
+
+define void @an_entrypoint(i32 %a) !guid !1 {
+ %t = icmp sgt i32 %a, 0
+ call void @foo(i32 10, ptr null)
+ br i1 %t, label %yes, label %no
+
+yes:
+ call void @foo(i32 1, ptr null)
+ ret void
+no:
+ ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 11872291593386833696 }
+!1 = !{i64 4909520559318251808}
More information about the llvm-branch-commits
mailing list