[llvm-branch-commits] [llvm] [ctx_prof] Flattened profile lowering pass (PR #107329)

Mircea Trofin via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Sep 6 11:21:54 PDT 2024


https://github.com/mtrofin updated https://github.com/llvm/llvm-project/pull/107329

>From 22e94e4f30c0b3f4c895e789961bff03db745980 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin at google.com>
Date: Tue, 3 Sep 2024 21:28:05 -0700
Subject: [PATCH] [ctx_prof] Flattened profile lowering pass

---
 llvm/include/llvm/ProfileData/ProfileCommon.h |   6 +-
 .../Instrumentation/PGOCtxProfFlattening.h    |  25 ++
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp      |   1 +
 llvm/lib/Passes/PassRegistry.def              |   1 +
 .../Transforms/Instrumentation/CMakeLists.txt |   1 +
 .../Instrumentation/PGOCtxProfFlattening.cpp  | 350 ++++++++++++++++++
 .../flatten-always-removes-instrumentation.ll |  12 +
 .../CtxProfAnalysis/flatten-and-annotate.ll   | 112 ++++++
 9 files changed, 506 insertions(+), 3 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
 create mode 100644 llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
 create mode 100644 llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll

diff --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index eaab59484c947a..edd8e1f644ad12 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -79,13 +79,13 @@ class ProfileSummaryBuilder {
 class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
   uint64_t MaxInternalBlockCount = 0;
 
-  inline void addEntryCount(uint64_t Count);
-  inline void addInternalCount(uint64_t Count);
-
 public:
   InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs)
       : ProfileSummaryBuilder(std::move(Cutoffs)) {}
 
+  void addEntryCount(uint64_t Count);
+  void addInternalCount(uint64_t Count);
+
   void addRecord(const InstrProfRecord &);
   std::unique_ptr<ProfileSummary> getSummary();
 };
diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
new file mode 100644
index 00000000000000..0eab3aaf6fcad3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
@@ -0,0 +1,25 @@
+//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfFlattening class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+
+#include "llvm/IR/PassManager.h"
+namespace llvm {
+
+class PGOCtxProfFlatteningPass
+    : public PassInfoMixin<PGOCtxProfFlatteningPass> {
+public:
+  explicit PGOCtxProfFlatteningPass() = default;
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+} // namespace llvm
+#endif
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a22abed8051a11..d87e64eff08966 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -198,6 +198,7 @@
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1fd7ef929c87d5..38297dc02b8be6 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index d6067089c6b5c1..2b0624cb9874da 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
 MODULE_PASS("ctx-instr-gen",
             PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
+MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
 MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
 MODULE_PASS("debugify", NewPMDebugifyPass())
 MODULE_PASS("dfsan", DataFlowSanitizerPass())
diff --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index deab37801ff1df..d45b07447d09da 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation
   InstrProfiling.cpp
   KCFI.cpp
   LowerAllowCheckPass.cpp
+  PGOCtxProfFlattening.cpp
   PGOCtxProfLowering.cpp
   PGOForceFunctionAttrs.cpp
   PGOInstrumentation.cpp
diff --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
new file mode 100644
index 00000000000000..d4224135f771b7
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -0,0 +1,350 @@
+//===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Flattens the contextual profile and lowers it to MD_prof.
+// This should happen after all IPO (which is assumed to have maintained the
+// contextual profile) happened. Flattening consists of summing the values at
+// the same index of the counters belonging to all the contexts of a function.
+// The lowering consists of materializing the counter values to function
+// entrypoint counts and branch probabilities.
+//
+// This pass also removes contextual instrumentation, which has been kept around
+// to facilitate its functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/DCE.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+class ProfileAnnotator final {
+  class BBInfo;
+  struct EdgeInfo {
+    BBInfo *const Src;
+    BBInfo *const Dest;
+    std::optional<uint64_t> Count;
+
+    explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
+  };
+
+  class BBInfo {
+    std::optional<uint64_t> Count;
+    SmallVector<EdgeInfo *> OutEdges;
+    SmallVector<EdgeInfo *> InEdges;
+    size_t UnknownCountOutEdges = 0;
+    size_t UnknownCountInEdges = 0;
+
+    // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
+    // because all the edge counters must be known.
+    uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
+                        bool AssumeAllKnown) const {
+      uint64_t Sum = 0;
+      for (const auto *E : Edges)
+        if (E)
+          Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
+      return Sum;
+    }
+
+    void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
+      assert(!Count.has_value());
+      Count = getEdgeSum(Edges, true);
+    }
+
+    void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
+      uint64_t KnownSum = getEdgeSum(Edges, false);
+      uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
+      EdgeInfo *E = nullptr;
+      for (auto *I : Edges)
+        if (I && !I->Count.has_value()) {
+          E = I;
+#ifdef NDEBUG
+          break;
+#else
+          assert((!E || E == I) &&
+                 "Expected exactly one edge to have an unknown count, "
+                 "found a second one");
+          continue;
+#endif
+        }
+      assert(E && "Expected exactly one edge to have an unknown count");
+      assert(!E->Count.has_value());
+      E->Count = EdgeVal;
+      assert(E->Src->UnknownCountOutEdges > 0);
+      assert(E->Dest->UnknownCountInEdges > 0);
+      --E->Src->UnknownCountOutEdges;
+      --E->Dest->UnknownCountInEdges;
+    }
+
+  public:
+    BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
+        : Count(Count) {
+      // For in edges, we just want to pre-allocate enough space, since we know
+      // it at this stage. For out edges, we will insert edges at the indices
+      // corresponding to positions in this BB's terminator instruction, so we
+      // construct a default (nullptr values)-initialized vector. A nullptr edge
+      // corresponds to those that are excluded (see shouldExcludeEdge).
+      InEdges.reserve(NumInEdges);
+      OutEdges.resize(NumOutEdges);
+    }
+
+    bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
+      if (!succ_empty(&BB) && !UnknownCountOutEdges) {
+        computeCountFrom(OutEdges);
+        return true;
+      }
+      return false;
+    }
+
+    bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
+      if (!BB.isEntryBlock() && !UnknownCountInEdges) {
+        computeCountFrom(InEdges);
+        return true;
+      }
+      return false;
+    }
+
+    void addInEdge(EdgeInfo &Info) {
+      InEdges.push_back(&Info);
+      ++UnknownCountInEdges;
+    }
+
+    // For the out edges, we care about the position we place them in, which is
+    // the position in terminator instruction's list (at construction). Later,
+    // we build branch_weights metadata with edge frequency values matching
+    // these positions.
+    void addOutEdge(size_t Index, EdgeInfo &Info) {
+      OutEdges[Index] = &Info;
+      ++UnknownCountOutEdges;
+    }
+
+    bool hasCount() const { return Count.has_value(); }
+
+    bool trySetSingleUnknownInEdgeCount() {
+      if (UnknownCountInEdges == 1) {
+        setSingleUnknownEdgeCount(InEdges);
+        return true;
+      }
+      return false;
+    }
+
+    bool trySetSingleUnknownOutEdgeCount() {
+      if (UnknownCountOutEdges == 1) {
+        setSingleUnknownEdgeCount(OutEdges);
+        return true;
+      }
+      return false;
+    }
+    size_t getNumOutEdges() const { return OutEdges.size(); }
+
+    uint64_t getEdgeCount(size_t Index) const {
+      if (auto *E = OutEdges[Index])
+        return *E->Count;
+      return 0U;
+    }
+  };
+
+  Function &F;
+  const SmallVectorImpl<uint64_t> &Counters;
+  // To be accessed through getBBInfo() after construction.
+  std::map<const BasicBlock *, BBInfo> BBInfos;
+  std::vector<EdgeInfo> EdgeInfos;
+  InstrProfSummaryBuilder &PB;
+
+  // This is an adaptation of PGOUseFunc::populateCounters.
+  // FIXME(mtrofin): look into factoring the code to share one implementation.
+  void propagateCounterValues(const SmallVectorImpl<uint64_t> &Counters) {
+    bool KeepGoing = true;
+    while (KeepGoing) {
+      KeepGoing = false;
+      for (const auto &BB : reverse(F)) {
+        auto &Info = getBBInfo(BB);
+        if (!Info.hasCount())
+          KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
+                       Info.tryTakeCountFromKnownInEdges(BB);
+        if (Info.hasCount()) {
+          KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
+          KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
+        }
+      }
+    }
+  }
+  // The only criteria for exclusion is faux suspend -> exit edges in presplit
+  // coroutines. The API serves for readability, currently.
+  bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
+    return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
+  }
+
+  BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
+
+public:
+  ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
+                   InstrProfSummaryBuilder &PB)
+      : F(F), Counters(Counters), PB(PB) {
+    assert(!F.isDeclaration());
+    assert(!Counters.empty());
+    size_t NrEdges = 0;
+    for (const auto &BB : F) {
+      std::optional<uint64_t> Count;
+      if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(
+              const_cast<BasicBlock &>(BB))) {
+        auto Index = Ins->getIndex()->getZExtValue();
+        assert(Index < Counters.size() &&
+               "The index must be inside the counters vector by construction - "
+               "tripping this assertion indicates a bug in how the contextual "
+               "profile is managed by IPO transforms");
+        Count = Counters[Ins->getIndex()->getZExtValue()];
+      }
+      auto [It, Ins] =
+          BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
+      (void)Ins;
+      assert(Ins && "We iterate through the function's BBs, no reason to "
+                    "insert one more than once");
+      NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
+        return !shouldExcludeEdge(BB, *Succ);
+      });
+    }
+    // Pre-allocate the vector, we want references to its contents to be stable.
+    EdgeInfos.reserve(NrEdges);
+    for (const auto &BB : F) {
+      auto &Info = getBBInfo(BB);
+      for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
+        const auto *Succ = BB.getTerminator()->getSuccessor(I);
+        if (!shouldExcludeEdge(BB, *Succ)) {
+          auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
+          Info.addOutEdge(I, EI);
+          getBBInfo(*Succ).addInEdge(EI);
+        }
+      }
+    }
+    assert(EdgeInfos.capacity() == NrEdges &&
+           "The capacity of EdgeInfos should have stayed unchanged it was "
+           "populated, because we need pointers to its contents to be stable");
+  }
+
+  /// Assign branch weights and function entry count. Also update the PSI
+  /// builder.
+  void assignProfileData() {
+    assert(!Counters.empty());
+    propagateCounterValues(Counters);
+    F.setEntryCount(Counters[0]);
+    PB.addEntryCount(Counters[0]);
+
+    for (auto &BB : F) {
+      if (succ_size(&BB) < 2)
+        continue;
+      auto *Term = BB.getTerminator();
+      SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
+      uint64_t MaxCount = 0;
+      const auto &BBInfo = getBBInfo(BB);
+      for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
+           ++SuccIdx) {
+        uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
+        if (EdgeCount > MaxCount)
+          MaxCount = EdgeCount;
+        EdgeCounts[SuccIdx] = EdgeCount;
+        PB.addInternalCount(EdgeCount);
+      }
+
+      if (MaxCount == 0)
+        F.getContext().emitError(
+            "[ctx-prof] Encountered a BB with more than one successor, where "
+            "all outgoing edges have a 0 count. This occurs in non-exiting "
+            "functions (message pumps, usually) which are not supported in the "
+            "contextual profiling case");
+      setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
+    }
+  }
+};
+
+bool areAllBBsReachable(const Function &F, FunctionAnalysisManager &FAM) {
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+  return llvm::all_of(
+      F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); });
+}
+
+void clearColdFunctionProfile(Function &F) {
+  for (auto &BB : F)
+    BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr);
+  F.setEntryCount(0U);
+}
+
+void removeInstrumentation(Function &F) {
+  for (auto &BB : F)
+    for (auto &I : llvm::make_early_inc_range(BB))
+      if (isa<InstrProfCntrInstBase>(I))
+        I.eraseFromParent();
+}
+
+} // namespace
+
+PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
+                                                ModuleAnalysisManager &MAM) {
+  // Ensure in all cases the instrumentation is removed: if this module had no
+  // roots, the contextual profile would evaluate to false, but there would
+  // still be instrumentation.
+  // Note: in such cases we leave as-is any other profile info (if present -
+  // e.g. synthetic weights, etc) because it wouldn't interfere with the
+  // contextual - based one (which would be in other modules)
+  auto OnExit = llvm::make_scope_exit([&]() {
+    for (auto &F : M)
+      removeInstrumentation(F);
+  });
+  auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
+  if (!CtxProf)
+    return PreservedAnalyses::all();
+
+  const auto FlattenedProfile = CtxProf.flatten();
+
+  InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+
+    assert(areAllBBsReachable(
+               F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
+                      .getManager()) &&
+           "Function has unreacheable basic blocks. The expectation was that "
+           "DCE was run before.");
+
+    const auto &FlatProfile =
+        FlattenedProfile.lookup(AssignGUIDPass::getGUID(F));
+    // If this function didn't appear in the contextual profile, it's cold.
+    if (FlatProfile.empty())
+      clearColdFunctionProfile(F);
+    else {
+      ProfileAnnotator S(F, FlatProfile, PB);
+      S.assignProfileData();
+    }
+  }
+
+  auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+
+  M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
+                      ProfileSummary::Kind::PSK_Instr);
+  PSI.refresh();
+  return PreservedAnalyses::none();
+}
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
new file mode 100644
index 00000000000000..c1c9cfa5a4f471
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
@@ -0,0 +1,12 @@
+; RUN: opt -passes=ctx-prof-flatten %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo() {
+  call void @llvm.instrprof.increment(ptr @foo, i64 123, i32 1, i32 0)
+  call void @llvm.instrprof.callsite(ptr @foo, i64 123, i32 1, i32 0, ptr @bar)
+  call void @bar()
+  ret void
+}
+
+; CHECK-NOT: call void @llvm.instrprof
\ No newline at end of file
diff --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
new file mode 100644
index 00000000000000..b7950b26a3ef27
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -0,0 +1,112 @@
+; REQUIRES: x86_64-linux
+;
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:   %t/example.ll -S -o %t/prelink.ll
+; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
+; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S  | FileCheck %s
+;
+;
+; Check that instrumentation occurs where expected: the "no" block for both foo and
+; @an_entrypoint - which explains the subsequent branch weights
+;
+; PRELINK-LABEL: @foo
+; PRELINK-LABEL: yes:
+; PRELINK-LABEL: no:
+; PRELINK-NEXT:     call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
+
+; PRELINK-LABEL: @an_entrypoint
+; PRELINK-LABEL: yes:
+; PRELINK-NEXT:    call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
+; PRELINK-NOT: "ProfileSummary"
+
+; Check that the output has:
+;  - no instrumentation
+;  - the 2 functions have an entry count
+;  - each conditional branch has profile annotation
+;
+; CHECK-NOT:   call void @llvm.instrprof
+;
+; make sure we have function entry counts, branch weights, and a profile summary.
+; CHECK-LABEL: @foo
+; CHECK-SAME:    !prof ![[FOO_EP:[0-9]+]]
+; CHECK:          br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
+; CHECK-LABEL: @an_entrypoint
+; CHECK-SAME:    !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
+; CHECK:          br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
+
+
+; CHECK:      ![[#]] = !{i32 1, !"ProfileSummary", !1}
+; CHECK:      ![[#]] = !{!"TotalCount", i64 480}
+; CHECK:      ![[#]] = !{!"MaxCount", i64 140}
+; CHECK:      ![[#]] = !{!"MaxInternalCount", i64 125}
+; CHECK:      ![[#]] = !{!"MaxFunctionCount", i64 140}
+; CHECK:      ![[#]] = !{!"NumCounts", i64 6}
+; CHECK:      ![[#]] = !{!"NumFunctions", i64 2}
+;
+; @foo will be called both unconditionally and conditionally, on the "yes" branch
+; which has a count of 40. So 140 times.
+
+; CHECK:       ![[FOO_EP]] = !{!"function_entry_count", i64 140} 
+
+; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
+; Which means its "yes" branch is taken 140 - 15 times.
+
+; CHECK:       ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15} 
+; CHECK:       ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
+; CHECK:       ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60} 
+
+;--- profile.json
+[
+  {
+    "Guid": 4909520559318251808,
+    "Counters": [100, 40],
+    "Callsites": [
+      [
+        {
+          "Guid": 11872291593386833696,
+          "Counters": [ 100, 5 ]
+        }
+      ],
+      [
+        {
+          "Guid": 11872291593386833696,
+          "Counters": [ 40, 10 ]
+        }
+      ]
+    ]
+  }
+]
+;--- example.ll
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) #0 !guid !0 {
+  %t = icmp sgt i32 %a, 7
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}
+
+define void @an_entrypoint(i32 %a) !guid !1 {
+  %t = icmp sgt i32 %a, 0
+  call void @foo(i32 10, ptr null)
+  br i1 %t, label %yes, label %no
+
+yes:
+  call void @foo(i32 1, ptr null)
+  ret void
+no:
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 11872291593386833696 }
+!1 = !{i64 4909520559318251808}



More information about the llvm-branch-commits mailing list