[compiler-rt] [llvm] [PGO] Add option to always instrumenting loop entries (PR #116789)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 27 02:18:13 PST 2024


https://github.com/ronryvchin updated https://github.com/llvm/llvm-project/pull/116789

>From b0210fa7e763b2e8e424f22658e65aa306e99f45 Mon Sep 17 00:00:00 2001
From: Ron Ryvchin <ron.ryvchin at nextsilicon.com>
Date: Mon, 25 Nov 2024 22:40:51 +0200
Subject: [PATCH] [PGO] Supporting code for always instrumenting loop entries

This patch extends the PGO infrastructure with an option to prefer the instrumentation of loop entry blocks.
This option is a generalization of https://github.com/llvm/llvm-project/commit/19fb5b467bb97f95eace1f3637d2d1041cebd3ce,
and helps to cover cases where the loop exit is never executed.
An example where this can occur are event handling loops.

Note that change does NOT change the default behavior.
---
 compiler-rt/include/profile/InstrProfData.inc |   5 +-
 llvm/include/llvm/ProfileData/InstrProf.h     |   4 +-
 .../llvm/ProfileData/InstrProfData.inc        |   5 +-
 .../llvm/ProfileData/InstrProfReader.h        |  21 ++++
 .../llvm/ProfileData/InstrProfWriter.h        |   4 +-
 .../llvm/Transforms/Instrumentation/CFGMST.h  |  35 +++++-
 llvm/lib/ProfileData/InstrProfReader.cpp      |   5 +
 llvm/lib/ProfileData/InstrProfWriter.cpp      |   7 ++
 .../Instrumentation/GCOVProfiling.cpp         |   3 +-
 .../Instrumentation/PGOInstrumentation.cpp    |  72 +++++++----
 .../Transforms/PGOProfile/loop_entries_gen.ll |  58 +++++++++
 .../Transforms/PGOProfile/loop_entries_use.ll | 115 ++++++++++++++++++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |   4 +-
 13 files changed, 308 insertions(+), 30 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/loop_entries_gen.ll
 create mode 100644 llvm/test/Transforms/PGOProfile/loop_entries_use.ll

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index c66b0465a0b548..39613da81ecb48 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_COVMAP_VERSION 6
 
 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the
- * version for other variants of profile. We set the 8th most significant bit 
+ * version for other variants of profile. We set the 8th most significant bit
  * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation
  * generated profile, and 0 if this is a Clang FE generated profile.
  * 1 in bit 57 indicates there are context-sensitive records in the profile.
+ * The 54th bit indicates whether to always instrument loop entry blocks.
+ * The 58th bit indicates whether to always instrument function entry blocks.
  * The 59th bit indicates whether to use debug info to correlate profiles.
  * The 60th bit indicates single byte coverage instrumentation.
  * The 61st bit indicates function entry instrumentation only.
@@ -742,6 +744,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define VARIANT_MASKS_ALL 0xffffffff00000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
+#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 55)
 #define VARIANT_MASK_IR_PROF (0x1ULL << 56)
 #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
 #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index c5f7800097807d..7133c0c6a302c7 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -344,7 +344,9 @@ enum class InstrProfKind {
   MemProf = 0x40,
   // A temporal profile.
   TemporalProfile = 0x80,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile)
+  // A profile with loop entry basic blocks instrumentation.
+  LoopEntriesInstrumentation = 0x100,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/LoopEntriesInstrumentation)
 };
 
 const std::error_category &instrprof_category();
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index c66b0465a0b548..39613da81ecb48 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_COVMAP_VERSION 6
 
 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the
- * version for other variants of profile. We set the 8th most significant bit 
+ * version for other variants of profile. We set the 8th most significant bit
  * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation
  * generated profile, and 0 if this is a Clang FE generated profile.
  * 1 in bit 57 indicates there are context-sensitive records in the profile.
+ * The 54th bit indicates whether to always instrument loop entry blocks.
+ * The 58th bit indicates whether to always instrument function entry blocks.
  * The 59th bit indicates whether to use debug info to correlate profiles.
  * The 60th bit indicates single byte coverage instrumentation.
  * The 61st bit indicates function entry instrumentation only.
@@ -742,6 +744,7 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define VARIANT_MASKS_ALL 0xffffffff00000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
+#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 55)
 #define VARIANT_MASK_IR_PROF (0x1ULL << 56)
 #define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
 #define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 058b9a1ce02e0b..f4795c06f394f4 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -123,6 +123,9 @@ class InstrProfReader {
 
   virtual bool instrEntryBBEnabled() const = 0;
 
+  /// Return true if the profile instruments all loop entries.
+  virtual bool instrLoopEntriesEnabled() const = 0;
+
   /// Return true if the profile has single byte counters representing coverage.
   virtual bool hasSingleByteCoverage() const = 0;
 
@@ -274,6 +277,11 @@ class TextInstrProfReader : public InstrProfReader {
                              InstrProfKind::FunctionEntryInstrumentation);
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return static_cast<bool>(ProfileKind &
+                             InstrProfKind::LoopEntriesInstrumentation);
+  }
+
   bool hasSingleByteCoverage() const override {
     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
   }
@@ -398,6 +406,10 @@ class RawInstrProfReader : public InstrProfReader {
     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
+  }
+
   bool hasSingleByteCoverage() const override {
     return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
   }
@@ -564,6 +576,7 @@ struct InstrProfReaderIndexBase {
   virtual bool isIRLevelProfile() const = 0;
   virtual bool hasCSIRLevelProfile() const = 0;
   virtual bool instrEntryBBEnabled() const = 0;
+  virtual bool instrLoopEntriesEnabled() const = 0;
   virtual bool hasSingleByteCoverage() const = 0;
   virtual bool functionEntryOnly() const = 0;
   virtual bool hasMemoryProfile() const = 0;
@@ -628,6 +641,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
+  }
+
   bool hasSingleByteCoverage() const override {
     return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
   }
@@ -753,6 +770,10 @@ class IndexedInstrProfReader : public InstrProfReader {
     return Index->instrEntryBBEnabled();
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return Index->instrLoopEntriesEnabled();
+  }
+
   bool hasSingleByteCoverage() const override {
     return Index->hasSingleByteCoverage();
   }
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index fa30926c662587..fdb51c4ab42182 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -190,7 +190,9 @@ class InstrProfWriter {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
     }
     if (testIncompatible(InstrProfKind::FunctionEntryOnly,
-                         InstrProfKind::FunctionEntryInstrumentation)) {
+                         InstrProfKind::FunctionEntryInstrumentation) ||
+        testIncompatible(InstrProfKind::FunctionEntryOnly,
+                         InstrProfKind::LoopEntriesInstrumentation)) {
       return make_error<InstrProfError>(
           instrprof_error::unsupported_version,
           "cannot merge FunctionEntryOnly profiles and BB profiles together");
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 35b3d615e3844a..99a52a68a9e98b 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
@@ -52,10 +53,14 @@ template <class Edge, class BBInfo> class CFGMST {
 
   BranchProbabilityInfo *const BPI;
   BlockFrequencyInfo *const BFI;
+  LoopInfo *const LI;
 
   // If function entry will be always instrumented.
   const bool InstrumentFuncEntry;
 
+  // If true loop entries will be always instrumented.
+  const bool InstrumentLoopEntries;
+
   // Find the root group of the G and compress the path from G to the root.
   BBInfo *findAndCompressGroup(BBInfo *G) {
     if (G->Group != G)
@@ -154,6 +159,11 @@ template <class Edge, class BBInfo> class CFGMST {
           }
           if (BPI != nullptr)
             Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+          // If InstrumentLoopEntries is on and TargetBB is a loop head (i.e.,
+          // the current edge leads to a loop), set Weight to be minimal, so
+          // that the edge won't be chosen for the MST and will be instrumented.
+          if (InstrumentLoopEntries && LI->isLoopHeader(TargetBB))
+            Weight = 0;
           if (Weight == 0)
             Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
@@ -252,6 +262,18 @@ template <class Edge, class BBInfo> class CFGMST {
     }
   }
 
+  [[maybe_unused]] bool validateLoopEntryInstrumentation() {
+    if (!InstrumentLoopEntries)
+      return true;
+    for (auto &Ei : AllEdges) {
+      if (Ei->Removed)
+        continue;
+      if (Ei->DestBB && LI->isLoopHeader(Ei->DestBB) && Ei->InMST)
+        return false;
+    }
+    return true;
+  }
+
 public:
   // Dump the Debug information about the instrumentation.
   void dumpEdges(raw_ostream &OS, const Twine &Message) const {
@@ -291,13 +313,20 @@ template <class Edge, class BBInfo> class CFGMST {
     return *AllEdges.back();
   }
 
-  CFGMST(Function &Func, bool InstrumentFuncEntry,
+  CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries,
          BranchProbabilityInfo *BPI = nullptr,
-         BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
+         BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr)
+      : F(Func), BPI(BPI), BFI(BFI), LI(LI),
+        InstrumentFuncEntry(InstrumentFuncEntry),
+        InstrumentLoopEntries(InstrumentLoopEntries) {
+    assert(!(InstrumentLoopEntries && !LI) &&
+           "expected a LoopInfo to instrumenting loop entries");
     buildEdges();
     sortEdgesByWeight();
     computeMinimumSpanningTree();
+    assert(validateLoopEntryInstrumentation() &&
+           "Loop entries should not be in MST when "
+           "InstrumentLoopEntries is on");
     if (AllEdges.size() > 1 && InstrumentFuncEntry)
       std::iter_swap(std::move(AllEdges.begin()),
                      std::move(AllEdges.begin() + AllEdges.size() - 1));
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index cd2c5d72ea5222..7296fddac36176 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -53,6 +53,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
   if (Version & VARIANT_MASK_INSTR_ENTRY) {
     ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
   }
+  if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) {
+    ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
+  }
   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
     ProfileKind |= InstrProfKind::SingleByteCoverage;
   }
@@ -263,6 +266,8 @@ Error TextInstrProfReader::readHeader() {
       ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
     else if (Str.equals_insensitive("not_entry_first"))
       ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
+    else if (Str.equals_insensitive("instrument_loop_entries"))
+      ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
     else if (Str.equals_insensitive("single_byte_coverage"))
       ProfileKind |= InstrProfKind::SingleByteCoverage;
     else if (Str.equals_insensitive("temporal_prof_traces")) {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 87a538f35c7865..f1ac5830fcd385 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -909,6 +909,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (static_cast<bool>(ProfileKind &
                         InstrProfKind::FunctionEntryInstrumentation))
     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::LoopEntriesInstrumentation))
+    Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
     Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
   if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
@@ -1152,6 +1155,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
   if (static_cast<bool>(ProfileKind &
                         InstrProfKind::FunctionEntryInstrumentation))
     OS << "# Always instrument the function entry block\n:entry_first\n";
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::LoopEntriesInstrumentation))
+    OS << "# Always instrument the loop entry "
+          "blocks\n:instrument_loop_entries\n";
   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
     OS << "# Instrument block coverage\n:single_byte_coverage\n";
   InstrProfSymtab Symtab;
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 2ea89be40a3d46..f9be7f933d31e4 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -820,7 +820,8 @@ bool GCOVProfiler::emitProfileNotes(
       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
                                    BFI);
 
-      CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
+      CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false,
+                               /*InstrumentLoopEntries=*/false, BPI, BFI);
 
       // getInstrBB can split basic blocks and push elements to AllEdges.
       for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 4d8141431a0c19..471086ce3a7511 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -259,6 +259,11 @@ static cl::opt<bool> PGOInstrumentEntry(
     "pgo-instrument-entry", cl::init(false), cl::Hidden,
     cl::desc("Force to instrument function entry basicblock."));
 
+static cl::opt<bool>
+    PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Force to instrument loop entries."));
+
 static cl::opt<bool> PGOFunctionEntryCoverage(
     "pgo-function-entry-coverage", cl::Hidden,
     cl::desc(
@@ -359,6 +364,7 @@ class FunctionInstrumenter final {
   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
   BranchProbabilityInfo *const BPI;
   BlockFrequencyInfo *const BFI;
+  LoopInfo *const LI;
 
   const PGOInstrumentationType InstrumentationType;
 
@@ -376,14 +382,17 @@ class FunctionInstrumenter final {
            InstrumentationType == PGOInstrumentationType::CTXPROF;
   }
 
+  bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
+
 public:
   FunctionInstrumenter(
       Module &M, Function &F, TargetLibraryInfo &TLI,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
+      LoopInfo *LI = nullptr,
       PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
       : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
-        InstrumentationType(InstrumentationType) {}
+        LI(LI), InstrumentationType(InstrumentationType) {}
 
   void instrument();
 };
@@ -439,6 +448,8 @@ createIRLevelProfileFlagVar(Module &M,
   if (PGOInstrumentEntry ||
       InstrumentationType == PGOInstrumentationType::CTXPROF)
     ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+  if (PGOInstrumentLoopEntries)
+    ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
   if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
     ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
   if (PGOFunctionEntryCoverage)
@@ -625,12 +636,13 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       Function &Func, TargetLibraryInfo &TLI,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
-      BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
-      bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
+      BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
+      bool IsCS = false, bool InstrumentFuncEntry = true,
+      bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
         TLI(TLI), ValueSites(IPVK_Last + 1),
         SIVisitor(Func, HasSingleByteCoverage),
-        MST(F, InstrumentFuncEntry, BPI, BFI),
+        MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
         BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
     if (BCI && PGOViewBlockCoverageGraph)
       BCI->viewBlockCoverageGraph();
@@ -916,9 +928,10 @@ void FunctionInstrumenter::instrument() {
 
   const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
   FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
-      F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI,
+      F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
       InstrumentationType == PGOInstrumentationType::CSFDO,
-      shouldInstrumentEntryBB(), PGOBlockCoverage);
+      shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
+      PGOBlockCoverage);
 
   auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
   auto *const CFGHash =
@@ -1136,11 +1149,13 @@ class PGOUseFunc {
   PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
-             ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
+             LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
+             bool InstrumentFuncEntry, bool InstrumentLoopEntries,
              bool HasSingleByteCoverage)
       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
-        FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
-                 InstrumentFuncEntry, HasSingleByteCoverage),
+        FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
+                 InstrumentFuncEntry, InstrumentLoopEntries,
+                 HasSingleByteCoverage),
         FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
 
   void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
@@ -1923,6 +1938,7 @@ static bool InstrumentAllFunctions(
     Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+    function_ref<LoopInfo *(Function &)> LookupLI,
     PGOInstrumentationType InstrumentationType) {
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
@@ -1943,10 +1959,11 @@ static bool InstrumentAllFunctions(
   for (auto &F : M) {
     if (skipPGOGen(F))
       continue;
-    auto &TLI = LookupTLI(F);
-    auto *BPI = LookupBPI(F);
-    auto *BFI = LookupBFI(F);
-    FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
+    TargetLibraryInfo &TLI = LookupTLI(F);
+    BranchProbabilityInfo *BPI = LookupBPI(F);
+    BlockFrequencyInfo *BFI = LookupBFI(F);
+    LoopInfo *LI = LookupLI(F);
+    FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
                             InstrumentationType);
     FI.instrument();
   }
@@ -1980,8 +1997,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
+  auto LookupLI = [&FAM](Function &F) {
+    return &FAM.getResult<LoopAnalysis>(F);
+  };
 
-  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
+  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
                               InstrumentationType))
     return PreservedAnalyses::all();
 
@@ -2116,7 +2136,8 @@ static bool annotateAllFunctions(
     function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
-    ProfileSummaryInfo *PSI, bool IsCS) {
+    function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
+    bool IsCS) {
   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
   auto &Ctx = M.getContext();
   // Read the counter array from file.
@@ -2181,22 +2202,27 @@ static bool annotateAllFunctions(
   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
   if (PGOInstrumentEntry.getNumOccurrences() > 0)
     InstrumentFuncEntry = PGOInstrumentEntry;
+  bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
+  if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
+    InstrumentLoopEntries = PGOInstrumentLoopEntries;
 
   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {
     if (skipPGOUse(F))
       continue;
-    auto &TLI = LookupTLI(F);
-    auto *BPI = LookupBPI(F);
-    auto *BFI = LookupBFI(F);
+    TargetLibraryInfo &TLI = LookupTLI(F);
+    BranchProbabilityInfo *BPI = LookupBPI(F);
+    BlockFrequencyInfo *BFI = LookupBFI(F);
+    LoopInfo *LI = LookupLI(F);
     if (!HasSingleByteCoverage) {
       // Split indirectbr critical edges here before computing the MST rather
       // than later in getInstrBB() to avoid invalidating it.
       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
                                    BFI);
     }
-    PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
-                    InstrumentFuncEntry, HasSingleByteCoverage);
+    PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
+                    InstrumentFuncEntry, InstrumentLoopEntries,
+                    HasSingleByteCoverage);
     if (HasSingleByteCoverage) {
       Func.populateCoverage(PGOReader.get());
       continue;
@@ -2335,10 +2361,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
+  auto LookupLI = [&FAM](Function &F) {
+    return &FAM.getResult<LoopAnalysis>(F);
+  };
 
   auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
-                            LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
+                            LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
+                            IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll
new file mode 100644
index 00000000000000..457ef6b3c31a59
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/loop_entries_gen.ll
@@ -0,0 +1,58 @@
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=false -S | FileCheck %s --check-prefixes=CHECK,NOTLOOPENTRIES --implicit-check-not=@llvm.instrprof.increment
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=true -S | FileCheck %s --check-prefixes=CHECK,LOOPENTRIES --implicit-check-not=@llvm.instrprof.increment
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-entry=true -S | FileCheck %s --check-prefixes=CHECK,FUNCTIONENTRY --implicit-check-not=@llvm.instrprof.increment
+
+; CHECK: $__llvm_profile_raw_version = comdat any
+; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
+; CHECK: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass"
+
+define i32 @test_simple_for_with_bypass(i32 %n) {
+entry:
+; CHECK: entry:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+  %mask = and i32 %n, 65535
+  %skip = icmp eq i32 %mask, 0
+  br i1 %skip, label %end, label %for.entry
+
+for.entry:
+; CHECK: for.entry:
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+  br label %for.cond
+
+for.cond:
+; CHECK: for.cond:
+  %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ]
+  %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i, %n
+  br i1 %cmp, label %for.body, label %for.end, !prof !1
+
+for.body:
+; CHECK: for.body:
+  %inc = add nsw i32 %sum, 1
+  br label %for.inc
+
+for.inc:
+; CHECK: for.inc:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+  %inc1 = add nsw i32 %i, 1
+  br label %for.cond
+
+for.end:
+; CHECK: for.end:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+  br label %end
+
+end:
+; CHECK: end:
+  %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ]
+  ret i32 %final_sum
+}
+
+; CHECK: declare void @llvm.instrprof.increment(ptr, i64, i32, i32) #0
+
+!1 = !{!"branch_weights", i32 100000, i32 80}
diff --git a/llvm/test/Transforms/PGOProfile/loop_entries_use.ll b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll
new file mode 100644
index 00000000000000..6e9b6807abf6dd
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/loop_entries_use.ll
@@ -0,0 +1,115 @@
+; RUN: rm -rf %t && split-file %s %t
+
+; RUN: llvm-profdata merge %t/default.proftext -o %t/default.profdata
+; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-instrument-loop-entries=false -pgo-test-profile-file=%t/default.profdata -S | FileCheck %s --implicit-check-not=@llvm.instrprof.increment
+; RUN: llvm-profdata merge %t/loop_entries.proftext -o %t/loop_entries.profdata
+; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-instrument-loop-entries=true -pgo-test-profile-file=%t/loop_entries.profdata -S | FileCheck %s --implicit-check-not=@llvm.instrprof.increment
+; RUN: llvm-profdata merge %t/function_entry.proftext -o %t/function_entry.profdata
+; RUN: opt %t/main.ll -passes=pgo-instr-use -pgo-instrument-entry=true -pgo-test-profile-file=%t/function_entry.profdata -S | FileCheck %s --implicit-check-not=@llvm.instrprof.increment
+
+;--- main.ll
+
+define i32 @test_simple_for_with_bypass(i32 %n) {
+; CHECK: define i32 @test_simple_for_with_bypass(i32 %n)
+; CHECK-SAME: !prof ![[ENTRY_COUNT:[0-9]*]]
+entry:
+; CHECK: entry:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+  %mask = and i32 %n, 65535
+  %skip = icmp eq i32 %mask, 0
+  br i1 %skip, label %end, label %for.entry
+; CHECK: br i1 %skip, label %end, label %for.entry
+; CHECK-SAME: !prof ![[BW_FOR_BYPASS:[0-9]+]]
+
+for.entry:
+; CHECK: for.entry:
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+  br label %for.cond
+
+for.cond:
+; CHECK: for.cond:
+  %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ]
+  %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i, %n
+  br i1 %cmp, label %for.body, label %for.end, !prof !1
+; CHECK: br i1 %cmp, label %for.body, label %for.end
+; CHECK-SAME: !prof ![[BW_FOR_COND:[0-9]+]]
+
+for.body:
+; CHECK: for.body:
+  %inc = add nsw i32 %sum, 1
+  br label %for.inc
+
+for.inc:
+; CHECK: for.inc:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+  %inc1 = add nsw i32 %i, 1
+  br label %for.cond
+
+for.end:
+; CHECK: for.end:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+  br label %end
+
+end:
+; CHECK: end:
+  %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ]
+  ret i32 %final_sum
+}
+
+!1 = !{!"branch_weights", i32 100000, i32 80}
+
+; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 12}
+; CHECK: ![[BW_FOR_BYPASS]] = !{!"branch_weights", i32 4, i32 8}
+; CHECK: ![[BW_FOR_COND]] = !{!"branch_weights", i32 123456, i32 8}
+
+;--- default.proftext
+
+# :ir is the flag to indicate this is IR level profile.
+:ir
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+123456
+12
+8
+
+;--- loop_entries.proftext
+
+# :ir is the flag to indicate this is IR level profile.
+:ir
+# Always instrument the loop entry blocks
+:instrument_loop_entries
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+12
+8
+123456
+
+;--- function_entry.proftext
+
+# :ir is the flag to indicate this is IR level profile.
+:ir
+# Always instrument the function entry block
+:entry_first
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+12
+123456
+8
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8a42e430fb54e8..21715d57b9772e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -2994,8 +2994,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
   bool IsIR = Reader->isIRLevelProfile();
   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
-  if (IsIR)
+  if (IsIR) {
     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
+    OS << "  instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
+  }
   OS << "\n";
   if (ShowAllFunctions || !FuncNameFilter.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";



More information about the llvm-commits mailing list