[llvm] 775c507 - [ctx_prof] Flattened profile lowering pass (#107329)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 6 13:47:11 PDT 2024


Author: Mircea Trofin
Date: 2024-09-06T13:47:08-07:00
New Revision: 775c50709c2d18acc085c089392847c5968457b9

URL: https://github.com/llvm/llvm-project/commit/775c50709c2d18acc085c089392847c5968457b9
DIFF: https://github.com/llvm/llvm-project/commit/775c50709c2d18acc085c089392847c5968457b9.diff

LOG: [ctx_prof] Flattened profile lowering pass (#107329)

Pass to flatten and lower the contextual profile to profile (i.e. `MD_prof`) metadata. This is expected to be used after all IPO transformations have happened.

Prior to lowering, the instrumentation is maintained during IPO and the contextual profile is kept in sync (see PRs #105469, #106154). Flattening (#104539) sums up all the counters belonging to all a function's context nodes.

We first propagate counter values (from the flattened profile) using the same propagation algorithm as `PGOUseFunc::populateCounters`, then map the edge values to `branch_weights`. Functions. in the module that don't have an entry in the flattened profile are deemed cold, and any `MD_prof` metadata they may have is reset. The profile summary is also reset at this point.

Issue [#89287](https://github.com/llvm/llvm-project/issues/89287)

Added: 
    llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
    llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
    llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
    llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll

Modified: 
    llvm/include/llvm/ProfileData/ProfileCommon.h
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Transforms/Instrumentation/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/ProfileCommon.h b/llvm/include/llvm/ProfileData/ProfileCommon.h
index eaab59484c947a..edd8e1f644ad12 100644
--- a/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -79,13 +79,13 @@ class ProfileSummaryBuilder {
 class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
   uint64_t MaxInternalBlockCount = 0;
 
-  inline void addEntryCount(uint64_t Count);
-  inline void addInternalCount(uint64_t Count);
-
 public:
   InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs)
       : ProfileSummaryBuilder(std::move(Cutoffs)) {}
 
+  void addEntryCount(uint64_t Count);
+  void addInternalCount(uint64_t Count);
+
   void addRecord(const InstrProfRecord &);
   std::unique_ptr<ProfileSummary> getSummary();
 };

diff  --git a/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
new file mode 100644
index 00000000000000..0eab3aaf6fcad3
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h
@@ -0,0 +1,25 @@
+//===-- PGOCtxProfFlattening.h - Contextual Instr. Flattening ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PGOCtxProfFlattening class.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+#define LLVM_TRANSFORMS_INSTRUMENTATION_PGOCTXPROFFLATTENING_H
+
+#include "llvm/IR/PassManager.h"
+namespace llvm {
+
+class PGOCtxProfFlatteningPass
+    : public PassInfoMixin<PGOCtxProfFlatteningPass> {
+public:
+  explicit PGOCtxProfFlatteningPass() = default;
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
+};
+} // namespace llvm
+#endif

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7c0acc0745eba3..1500087bbb7118 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -198,6 +198,7 @@
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
 #include "llvm/Transforms/Instrumentation/NumericalStabilitySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1fd7ef929c87d5..38297dc02b8be6 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -76,6 +76,7 @@
 #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
 #include "llvm/Transforms/Instrumentation/InstrProfiling.h"
 #include "llvm/Transforms/Instrumentation/MemProfiler.h"
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
 #include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
 #include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index a1324e81705669..9e5ea2f6be522f 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -58,6 +58,7 @@ MODULE_PASS("coro-early", CoroEarlyPass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
 MODULE_PASS("ctx-instr-gen",
             PGOInstrumentationGen(PGOInstrumentationType::CTXPROF))
+MODULE_PASS("ctx-prof-flatten", PGOCtxProfFlatteningPass())
 MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
 MODULE_PASS("debugify", NewPMDebugifyPass())
 MODULE_PASS("dfsan", DataFlowSanitizerPass())

diff  --git a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
index deab37801ff1df..d45b07447d09da 100644
--- a/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
+++ b/llvm/lib/Transforms/Instrumentation/CMakeLists.txt
@@ -15,6 +15,7 @@ add_llvm_component_library(LLVMInstrumentation
   InstrProfiling.cpp
   KCFI.cpp
   LowerAllowCheckPass.cpp
+  PGOCtxProfFlattening.cpp
   PGOCtxProfLowering.cpp
   PGOForceFunctionAttrs.cpp
   PGOInstrumentation.cpp

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
new file mode 100644
index 00000000000000..d4224135f771b7
--- /dev/null
+++ b/llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp
@@ -0,0 +1,350 @@
+//===- PGOCtxProfFlattening.cpp - Contextual Instr. Flattening ------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Flattens the contextual profile and lowers it to MD_prof.
+// This should happen after all IPO (which is assumed to have maintained the
+// contextual profile) happened. Flattening consists of summing the values at
+// the same index of the counters belonging to all the contexts of a function.
+// The lowering consists of materializing the counter values to function
+// entrypoint counts and branch probabilities.
+//
+// This pass also removes contextual instrumentation, which has been kept around
+// to facilitate its functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Instrumentation/PGOCtxProfFlattening.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/Analysis/CtxProfAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/IR/Analysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Scalar/DCE.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+
+using namespace llvm;
+
+namespace {
+
+class ProfileAnnotator final {
+  class BBInfo;
+  struct EdgeInfo {
+    BBInfo *const Src;
+    BBInfo *const Dest;
+    std::optional<uint64_t> Count;
+
+    explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
+  };
+
+  class BBInfo {
+    std::optional<uint64_t> Count;
+    SmallVector<EdgeInfo *> OutEdges;
+    SmallVector<EdgeInfo *> InEdges;
+    size_t UnknownCountOutEdges = 0;
+    size_t UnknownCountInEdges = 0;
+
+    // Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
+    // because all the edge counters must be known.
+    uint64_t getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
+                        bool AssumeAllKnown) const {
+      uint64_t Sum = 0;
+      for (const auto *E : Edges)
+        if (E)
+          Sum += AssumeAllKnown ? *E->Count : E->Count.value_or(0U);
+      return Sum;
+    }
+
+    void computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
+      assert(!Count.has_value());
+      Count = getEdgeSum(Edges, true);
+    }
+
+    void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
+      uint64_t KnownSum = getEdgeSum(Edges, false);
+      uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
+      EdgeInfo *E = nullptr;
+      for (auto *I : Edges)
+        if (I && !I->Count.has_value()) {
+          E = I;
+#ifdef NDEBUG
+          break;
+#else
+          assert((!E || E == I) &&
+                 "Expected exactly one edge to have an unknown count, "
+                 "found a second one");
+          continue;
+#endif
+        }
+      assert(E && "Expected exactly one edge to have an unknown count");
+      assert(!E->Count.has_value());
+      E->Count = EdgeVal;
+      assert(E->Src->UnknownCountOutEdges > 0);
+      assert(E->Dest->UnknownCountInEdges > 0);
+      --E->Src->UnknownCountOutEdges;
+      --E->Dest->UnknownCountInEdges;
+    }
+
+  public:
+    BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
+        : Count(Count) {
+      // For in edges, we just want to pre-allocate enough space, since we know
+      // it at this stage. For out edges, we will insert edges at the indices
+      // corresponding to positions in this BB's terminator instruction, so we
+      // construct a default (nullptr values)-initialized vector. A nullptr edge
+      // corresponds to those that are excluded (see shouldExcludeEdge).
+      InEdges.reserve(NumInEdges);
+      OutEdges.resize(NumOutEdges);
+    }
+
+    bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
+      if (!succ_empty(&BB) && !UnknownCountOutEdges) {
+        computeCountFrom(OutEdges);
+        return true;
+      }
+      return false;
+    }
+
+    bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
+      if (!BB.isEntryBlock() && !UnknownCountInEdges) {
+        computeCountFrom(InEdges);
+        return true;
+      }
+      return false;
+    }
+
+    void addInEdge(EdgeInfo &Info) {
+      InEdges.push_back(&Info);
+      ++UnknownCountInEdges;
+    }
+
+    // For the out edges, we care about the position we place them in, which is
+    // the position in terminator instruction's list (at construction). Later,
+    // we build branch_weights metadata with edge frequency values matching
+    // these positions.
+    void addOutEdge(size_t Index, EdgeInfo &Info) {
+      OutEdges[Index] = &Info;
+      ++UnknownCountOutEdges;
+    }
+
+    bool hasCount() const { return Count.has_value(); }
+
+    bool trySetSingleUnknownInEdgeCount() {
+      if (UnknownCountInEdges == 1) {
+        setSingleUnknownEdgeCount(InEdges);
+        return true;
+      }
+      return false;
+    }
+
+    bool trySetSingleUnknownOutEdgeCount() {
+      if (UnknownCountOutEdges == 1) {
+        setSingleUnknownEdgeCount(OutEdges);
+        return true;
+      }
+      return false;
+    }
+    size_t getNumOutEdges() const { return OutEdges.size(); }
+
+    uint64_t getEdgeCount(size_t Index) const {
+      if (auto *E = OutEdges[Index])
+        return *E->Count;
+      return 0U;
+    }
+  };
+
+  Function &F;
+  const SmallVectorImpl<uint64_t> &Counters;
+  // To be accessed through getBBInfo() after construction.
+  std::map<const BasicBlock *, BBInfo> BBInfos;
+  std::vector<EdgeInfo> EdgeInfos;
+  InstrProfSummaryBuilder &PB;
+
+  // This is an adaptation of PGOUseFunc::populateCounters.
+  // FIXME(mtrofin): look into factoring the code to share one implementation.
+  void propagateCounterValues(const SmallVectorImpl<uint64_t> &Counters) {
+    bool KeepGoing = true;
+    while (KeepGoing) {
+      KeepGoing = false;
+      for (const auto &BB : reverse(F)) {
+        auto &Info = getBBInfo(BB);
+        if (!Info.hasCount())
+          KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
+                       Info.tryTakeCountFromKnownInEdges(BB);
+        if (Info.hasCount()) {
+          KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
+          KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
+        }
+      }
+    }
+  }
+  // The only criteria for exclusion is faux suspend -> exit edges in presplit
+  // coroutines. The API serves for readability, currently.
+  bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
+    return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
+  }
+
+  BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
+
+public:
+  ProfileAnnotator(Function &F, const SmallVectorImpl<uint64_t> &Counters,
+                   InstrProfSummaryBuilder &PB)
+      : F(F), Counters(Counters), PB(PB) {
+    assert(!F.isDeclaration());
+    assert(!Counters.empty());
+    size_t NrEdges = 0;
+    for (const auto &BB : F) {
+      std::optional<uint64_t> Count;
+      if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(
+              const_cast<BasicBlock &>(BB))) {
+        auto Index = Ins->getIndex()->getZExtValue();
+        assert(Index < Counters.size() &&
+               "The index must be inside the counters vector by construction - "
+               "tripping this assertion indicates a bug in how the contextual "
+               "profile is managed by IPO transforms");
+        Count = Counters[Ins->getIndex()->getZExtValue()];
+      }
+      auto [It, Ins] =
+          BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
+      (void)Ins;
+      assert(Ins && "We iterate through the function's BBs, no reason to "
+                    "insert one more than once");
+      NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
+        return !shouldExcludeEdge(BB, *Succ);
+      });
+    }
+    // Pre-allocate the vector, we want references to its contents to be stable.
+    EdgeInfos.reserve(NrEdges);
+    for (const auto &BB : F) {
+      auto &Info = getBBInfo(BB);
+      for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
+        const auto *Succ = BB.getTerminator()->getSuccessor(I);
+        if (!shouldExcludeEdge(BB, *Succ)) {
+          auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
+          Info.addOutEdge(I, EI);
+          getBBInfo(*Succ).addInEdge(EI);
+        }
+      }
+    }
+    assert(EdgeInfos.capacity() == NrEdges &&
+           "The capacity of EdgeInfos should have stayed unchanged it was "
+           "populated, because we need pointers to its contents to be stable");
+  }
+
+  /// Assign branch weights and function entry count. Also update the PSI
+  /// builder.
+  void assignProfileData() {
+    assert(!Counters.empty());
+    propagateCounterValues(Counters);
+    F.setEntryCount(Counters[0]);
+    PB.addEntryCount(Counters[0]);
+
+    for (auto &BB : F) {
+      if (succ_size(&BB) < 2)
+        continue;
+      auto *Term = BB.getTerminator();
+      SmallVector<uint64_t, 2> EdgeCounts(Term->getNumSuccessors(), 0);
+      uint64_t MaxCount = 0;
+      const auto &BBInfo = getBBInfo(BB);
+      for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
+           ++SuccIdx) {
+        uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
+        if (EdgeCount > MaxCount)
+          MaxCount = EdgeCount;
+        EdgeCounts[SuccIdx] = EdgeCount;
+        PB.addInternalCount(EdgeCount);
+      }
+
+      if (MaxCount == 0)
+        F.getContext().emitError(
+            "[ctx-prof] Encountered a BB with more than one successor, where "
+            "all outgoing edges have a 0 count. This occurs in non-exiting "
+            "functions (message pumps, usually) which are not supported in the "
+            "contextual profiling case");
+      setProfMetadata(F.getParent(), Term, EdgeCounts, MaxCount);
+    }
+  }
+};
+
+bool areAllBBsReachable(const Function &F, FunctionAnalysisManager &FAM) {
+  auto &DT = FAM.getResult<DominatorTreeAnalysis>(const_cast<Function &>(F));
+  return llvm::all_of(
+      F, [&](const BasicBlock &BB) { return DT.isReachableFromEntry(&BB); });
+}
+
+void clearColdFunctionProfile(Function &F) {
+  for (auto &BB : F)
+    BB.getTerminator()->setMetadata(LLVMContext::MD_prof, nullptr);
+  F.setEntryCount(0U);
+}
+
+void removeInstrumentation(Function &F) {
+  for (auto &BB : F)
+    for (auto &I : llvm::make_early_inc_range(BB))
+      if (isa<InstrProfCntrInstBase>(I))
+        I.eraseFromParent();
+}
+
+} // namespace
+
+PreservedAnalyses PGOCtxProfFlatteningPass::run(Module &M,
+                                                ModuleAnalysisManager &MAM) {
+  // Ensure in all cases the instrumentation is removed: if this module had no
+  // roots, the contextual profile would evaluate to false, but there would
+  // still be instrumentation.
+  // Note: in such cases we leave as-is any other profile info (if present -
+  // e.g. synthetic weights, etc) because it wouldn't interfere with the
+  // contextual - based one (which would be in other modules)
+  auto OnExit = llvm::make_scope_exit([&]() {
+    for (auto &F : M)
+      removeInstrumentation(F);
+  });
+  auto &CtxProf = MAM.getResult<CtxProfAnalysis>(M);
+  if (!CtxProf)
+    return PreservedAnalyses::all();
+
+  const auto FlattenedProfile = CtxProf.flatten();
+
+  InstrProfSummaryBuilder PB(ProfileSummaryBuilder::DefaultCutoffs);
+  for (auto &F : M) {
+    if (F.isDeclaration())
+      continue;
+
+    assert(areAllBBsReachable(
+               F, MAM.getResult<FunctionAnalysisManagerModuleProxy>(M)
+                      .getManager()) &&
+           "Function has unreacheable basic blocks. The expectation was that "
+           "DCE was run before.");
+
+    const auto &FlatProfile =
+        FlattenedProfile.lookup(AssignGUIDPass::getGUID(F));
+    // If this function didn't appear in the contextual profile, it's cold.
+    if (FlatProfile.empty())
+      clearColdFunctionProfile(F);
+    else {
+      ProfileAnnotator S(F, FlatProfile, PB);
+      S.assignProfileData();
+    }
+  }
+
+  auto &PSI = MAM.getResult<ProfileSummaryAnalysis>(M);
+
+  M.setProfileSummary(PB.getSummary()->getMD(M.getContext()),
+                      ProfileSummary::Kind::PSK_Instr);
+  PSI.refresh();
+  return PreservedAnalyses::none();
+}

diff  --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
new file mode 100644
index 00000000000000..c1c9cfa5a4f471
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-always-removes-instrumentation.ll
@@ -0,0 +1,12 @@
+; RUN: opt -passes=ctx-prof-flatten %s -S | FileCheck %s
+
+declare void @bar()
+
+define void @foo() {
+  call void @llvm.instrprof.increment(ptr @foo, i64 123, i32 1, i32 0)
+  call void @llvm.instrprof.callsite(ptr @foo, i64 123, i32 1, i32 0, ptr @bar)
+  call void @bar()
+  ret void
+}
+
+; CHECK-NOT: call void @llvm.instrprof
\ No newline at end of file

diff  --git a/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
new file mode 100644
index 00000000000000..b7950b26a3ef27
--- /dev/null
+++ b/llvm/test/Analysis/CtxProfAnalysis/flatten-and-annotate.ll
@@ -0,0 +1,112 @@
+; REQUIRES: x86_64-linux
+;
+; RUN: rm -rf %t
+; RUN: split-file %s %t
+; RUN: llvm-ctxprof-util fromJSON --input=%t/profile.json --output=%t/profile.ctxprofdata
+; RUN: opt -module-summary -passes='thinlto-pre-link<O2>' -use-ctx-profile=%t/profile.ctxprofdata \
+; RUN:   %t/example.ll -S -o %t/prelink.ll
+; RUN: FileCheck --input-file %t/prelink.ll %s --check-prefix=PRELINK
+; RUN: opt -passes='ctx-prof-flatten' -use-ctx-profile=%t/profile.ctxprofdata %t/prelink.ll -S  | FileCheck %s
+;
+;
+; Check that instrumentation occurs where expected: the "no" block for both foo and
+; @an_entrypoint - which explains the subsequent branch weights
+;
+; PRELINK-LABEL: @foo
+; PRELINK-LABEL: yes:
+; PRELINK-LABEL: no:
+; PRELINK-NEXT:     call void @llvm.instrprof.increment(ptr @foo, i64 [[#]], i32 2, i32 1)
+
+; PRELINK-LABEL: @an_entrypoint
+; PRELINK-LABEL: yes:
+; PRELINK-NEXT:    call void @llvm.instrprof.increment(ptr @an_entrypoint, i64 [[#]], i32 2, i32 1)
+; PRELINK-NOT: "ProfileSummary"
+
+; Check that the output has:
+;  - no instrumentation
+;  - the 2 functions have an entry count
+;  - each conditional branch has profile annotation
+;
+; CHECK-NOT:   call void @llvm.instrprof
+;
+; make sure we have function entry counts, branch weights, and a profile summary.
+; CHECK-LABEL: @foo
+; CHECK-SAME:    !prof ![[FOO_EP:[0-9]+]]
+; CHECK:          br i1 %t, label %yes, label %no, !prof ![[FOO_BW:[0-9]+]]
+; CHECK-LABEL: @an_entrypoint
+; CHECK-SAME:    !prof ![[AN_ENTRYPOINT_EP:[0-9]+]]
+; CHECK:          br i1 %t, label %yes, label %common.ret, !prof ![[AN_ENTRYPOINT_BW:[0-9]+]]
+
+
+; CHECK:      ![[#]] = !{i32 1, !"ProfileSummary", !1}
+; CHECK:      ![[#]] = !{!"TotalCount", i64 480}
+; CHECK:      ![[#]] = !{!"MaxCount", i64 140}
+; CHECK:      ![[#]] = !{!"MaxInternalCount", i64 125}
+; CHECK:      ![[#]] = !{!"MaxFunctionCount", i64 140}
+; CHECK:      ![[#]] = !{!"NumCounts", i64 6}
+; CHECK:      ![[#]] = !{!"NumFunctions", i64 2}
+;
+; @foo will be called both unconditionally and conditionally, on the "yes" branch
+; which has a count of 40. So 140 times.
+
+; CHECK:       ![[FOO_EP]] = !{!"function_entry_count", i64 140} 
+
+; foo's "no" branch is taken 10+5 times (from the 2 contexts belonging to foo).
+; Which means its "yes" branch is taken 140 - 15 times.
+
+; CHECK:       ![[FOO_BW]] = !{!"branch_weights", i32 125, i32 15} 
+; CHECK:       ![[AN_ENTRYPOINT_EP]] = !{!"function_entry_count", i64 100}
+; CHECK:       ![[AN_ENTRYPOINT_BW]] = !{!"branch_weights", i32 40, i32 60} 
+
+;--- profile.json
+[
+  {
+    "Guid": 4909520559318251808,
+    "Counters": [100, 40],
+    "Callsites": [
+      [
+        {
+          "Guid": 11872291593386833696,
+          "Counters": [ 100, 5 ]
+        }
+      ],
+      [
+        {
+          "Guid": 11872291593386833696,
+          "Counters": [ 40, 10 ]
+        }
+      ]
+    ]
+  }
+]
+;--- example.ll
+declare void @bar()
+
+define void @foo(i32 %a, ptr %fct) #0 !guid !0 {
+  %t = icmp sgt i32 %a, 7
+  br i1 %t, label %yes, label %no
+yes:
+  call void %fct(i32 %a)
+  br label %exit
+no:
+  call void @bar()
+  br label %exit
+exit:
+  ret void
+}
+
+define void @an_entrypoint(i32 %a) !guid !1 {
+  %t = icmp sgt i32 %a, 0
+  call void @foo(i32 10, ptr null)
+  br i1 %t, label %yes, label %no
+
+yes:
+  call void @foo(i32 1, ptr null)
+  ret void
+no:
+  ret void
+}
+
+attributes #0 = { noinline }
+!0 = !{ i64 11872291593386833696 }
+!1 = !{i64 4909520559318251808}


        


More information about the llvm-commits mailing list