[compiler-rt] [llvm] [PGO] Add option to always instrumenting loop entries (PR #116789)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 04:43:50 PST 2024


https://github.com/ronryvchin updated https://github.com/llvm/llvm-project/pull/116789

>From ddd377ce5eeebddf11d2490449d2b53d1c213ce6 Mon Sep 17 00:00:00 2001
From: Ron Ryvchin <ron.ryvchin at nextsilicon.com>
Date: Sun, 17 Nov 2024 11:11:44 +0200
Subject: [PATCH] [PGO] Supporting code for always instrumenting loop entries

This patch extends the PGO infrastructure with an option to prefer the instrumentation of loop entry blocks.
This option is a generalization of https://github.com/llvm/llvm-project/commit/19fb5b467bb97f95eace1f3637d2d1041cebd3ce,
and helps to cover cases where the loop exit is never executed.
An example where this can occur are event handling loops.

Note that change does NOT change the default behavior.
---
 compiler-rt/include/profile/InstrProfData.inc |  15 ++--
 llvm/include/llvm/ProfileData/InstrProf.h     |   4 +-
 .../llvm/ProfileData/InstrProfData.inc        |  15 ++--
 .../llvm/ProfileData/InstrProfReader.h        |  20 +++++
 .../llvm/ProfileData/InstrProfWriter.h        |   4 +-
 .../llvm/Transforms/Instrumentation/CFGMST.h  |  20 ++++-
 llvm/lib/ProfileData/InstrProfReader.cpp      |   7 +-
 llvm/lib/ProfileData/InstrProfWriter.cpp      |   7 ++
 .../Instrumentation/GCOVProfiling.cpp         |   3 +-
 .../Instrumentation/PGOInstrumentation.cpp    |  72 +++++++++++-----
 .../PGOProfile/Inputs/loop3.proftext          |  12 +++
 .../Inputs/loop3_function_entry.proftext      |  13 +++
 .../Inputs/loop3_loop_entries.proftext        |  13 +++
 .../thinlto_indirect_call_promotion.profraw   | Bin 544 -> 544 bytes
 .../PGOProfile/counter_promo_exit_merge.ll    |   2 +-
 .../instrprof_burst_sampling_fast.ll          |   2 +-
 .../instrprof_burst_sampling_full.ll          |   2 +-
 .../instrprof_burst_sampling_full_intsize.ll  |   2 +-
 .../PGOProfile/instrprof_simple_sampling.ll   |   2 +-
 llvm/test/Transforms/PGOProfile/loop3.ll      |  78 ++++++++++++++++++
 .../Transforms/PGOProfile/vtable_profile.ll   |   2 +-
 llvm/tools/llvm-profdata/llvm-profdata.cpp    |   4 +-
 22 files changed, 252 insertions(+), 47 deletions(-)
 create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/loop3.proftext
 create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/loop3_function_entry.proftext
 create mode 100644 llvm/test/Transforms/PGOProfile/Inputs/loop3_loop_entries.proftext
 create mode 100644 llvm/test/Transforms/PGOProfile/loop3.ll

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index c66b0465a0b548..ef99b23cc0ec7a 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_COVMAP_VERSION 6
 
 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the
- * version for other variants of profile. We set the 8th most significant bit 
- * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation
+ * version for other variants of profile. We set the 9th most significant bit
+ * (i.e. bit 55) to 1 to indicate if this is an IR-level instrumentation
  * generated profile, and 0 if this is a Clang FE generated profile.
- * 1 in bit 57 indicates there are context-sensitive records in the profile.
+ * 1 in bit 56 indicates there are context-sensitive records in the profile.
+ * The 57th bit indicates whether to always instrument function entry blocks.
+ * The 58th bit indicates whether to always instrument loop entry blocks.
  * The 59th bit indicates whether to use debug info to correlate profiles.
  * The 60th bit indicates single byte coverage instrumentation.
  * The 61st bit indicates function entry instrumentation only.
@@ -742,9 +744,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define VARIANT_MASKS_ALL 0xffffffff00000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
-#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
-#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
-#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
+#define VARIANT_MASK_IR_PROF (0x1ULL << 55)
+#define VARIANT_MASK_CSIR_PROF (0x1ULL << 56)
+#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 57)
+#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 58)
 #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
 #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
 #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index c5f7800097807d..7133c0c6a302c7 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -344,7 +344,9 @@ enum class InstrProfKind {
   MemProf = 0x40,
   // A temporal profile.
   TemporalProfile = 0x80,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/TemporalProfile)
+  // A profile with loop entry basic blocks instrumentation.
+  LoopEntriesInstrumentation = 0x100,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/LoopEntriesInstrumentation)
 };
 
 const std::error_category &instrprof_category();
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index c66b0465a0b548..ef99b23cc0ec7a 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -730,10 +730,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_COVMAP_VERSION 6
 
 /* Profile version is always of type uint64_t. Reserve the upper 32 bits in the
- * version for other variants of profile. We set the 8th most significant bit 
- * (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentation
+ * version for other variants of profile. We set the 9th most significant bit
+ * (i.e. bit 55) to 1 to indicate if this is an IR-level instrumentation
  * generated profile, and 0 if this is a Clang FE generated profile.
- * 1 in bit 57 indicates there are context-sensitive records in the profile.
+ * 1 in bit 56 indicates there are context-sensitive records in the profile.
+ * The 57th bit indicates whether to always instrument function entry blocks.
+ * The 58th bit indicates whether to always instrument loop entry blocks.
  * The 59th bit indicates whether to use debug info to correlate profiles.
  * The 60th bit indicates single byte coverage instrumentation.
  * The 61st bit indicates function entry instrumentation only.
@@ -742,9 +744,10 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
  */
 #define VARIANT_MASKS_ALL 0xffffffff00000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
-#define VARIANT_MASK_IR_PROF (0x1ULL << 56)
-#define VARIANT_MASK_CSIR_PROF (0x1ULL << 57)
-#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 58)
+#define VARIANT_MASK_IR_PROF (0x1ULL << 55)
+#define VARIANT_MASK_CSIR_PROF (0x1ULL << 56)
+#define VARIANT_MASK_INSTR_ENTRY (0x1ULL << 57)
+#define VARIANT_MASK_INSTR_LOOP_ENTRIES (0x1ULL << 58)
 #define VARIANT_MASK_DBG_CORRELATE (0x1ULL << 59)
 #define VARIANT_MASK_BYTE_COVERAGE (0x1ULL << 60)
 #define VARIANT_MASK_FUNCTION_ENTRY_ONLY (0x1ULL << 61)
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 058b9a1ce02e0b..fc678fe19eb1ea 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -123,6 +123,8 @@ class InstrProfReader {
 
   virtual bool instrEntryBBEnabled() const = 0;
 
+  virtual bool instrLoopEntriesEnabled() const = 0;
+
   /// Return true if the profile has single byte counters representing coverage.
   virtual bool hasSingleByteCoverage() const = 0;
 
@@ -274,6 +276,11 @@ class TextInstrProfReader : public InstrProfReader {
                              InstrProfKind::FunctionEntryInstrumentation);
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return static_cast<bool>(ProfileKind &
+                             InstrProfKind::LoopEntriesInstrumentation);
+  }
+
   bool hasSingleByteCoverage() const override {
     return static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage);
   }
@@ -398,6 +405,10 @@ class RawInstrProfReader : public InstrProfReader {
     return (Version & VARIANT_MASK_INSTR_ENTRY) != 0;
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
+  }
+
   bool hasSingleByteCoverage() const override {
     return (Version & VARIANT_MASK_BYTE_COVERAGE) != 0;
   }
@@ -564,6 +575,7 @@ struct InstrProfReaderIndexBase {
   virtual bool isIRLevelProfile() const = 0;
   virtual bool hasCSIRLevelProfile() const = 0;
   virtual bool instrEntryBBEnabled() const = 0;
+  virtual bool instrLoopEntriesEnabled() const = 0;
   virtual bool hasSingleByteCoverage() const = 0;
   virtual bool functionEntryOnly() const = 0;
   virtual bool hasMemoryProfile() const = 0;
@@ -628,6 +640,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
     return (FormatVersion & VARIANT_MASK_INSTR_ENTRY) != 0;
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return (FormatVersion & VARIANT_MASK_INSTR_LOOP_ENTRIES) != 0;
+  }
+
   bool hasSingleByteCoverage() const override {
     return (FormatVersion & VARIANT_MASK_BYTE_COVERAGE) != 0;
   }
@@ -753,6 +769,10 @@ class IndexedInstrProfReader : public InstrProfReader {
     return Index->instrEntryBBEnabled();
   }
 
+  bool instrLoopEntriesEnabled() const override {
+    return Index->instrLoopEntriesEnabled();
+  }
+
   bool hasSingleByteCoverage() const override {
     return Index->hasSingleByteCoverage();
   }
diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h
index fa30926c662587..fdb51c4ab42182 100644
--- a/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -190,7 +190,9 @@ class InstrProfWriter {
       return make_error<InstrProfError>(instrprof_error::unsupported_version);
     }
     if (testIncompatible(InstrProfKind::FunctionEntryOnly,
-                         InstrProfKind::FunctionEntryInstrumentation)) {
+                         InstrProfKind::FunctionEntryInstrumentation) ||
+        testIncompatible(InstrProfKind::FunctionEntryOnly,
+                         InstrProfKind::LoopEntriesInstrumentation)) {
       return make_error<InstrProfError>(
           instrprof_error::unsupported_version,
           "cannot merge FunctionEntryOnly profiles and BB profiles together");
diff --git a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
index 35b3d615e3844a..8540f3103c23f5 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/BranchProbability.h"
@@ -52,10 +53,14 @@ template <class Edge, class BBInfo> class CFGMST {
 
   BranchProbabilityInfo *const BPI;
   BlockFrequencyInfo *const BFI;
+  LoopInfo *const LI;
 
   // If function entry will be always instrumented.
   const bool InstrumentFuncEntry;
 
+  // If true loop entries will be always instrumented.
+  const bool InstrumentLoopEntries;
+
   // Find the root group of the G and compress the path from G to the root.
   BBInfo *findAndCompressGroup(BBInfo *G) {
     if (G->Group != G)
@@ -154,6 +159,11 @@ template <class Edge, class BBInfo> class CFGMST {
           }
           if (BPI != nullptr)
             Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
+          // If InstrumentLoopEntries is on and TargetBB is a loop head (i.e.,
+          // the current edge leads to a loop), set Weight to be minimal, so
+          // that the edge won't be chosen for the MST and will be instrumented.
+          if (InstrumentLoopEntries && LI->isLoopHeader(TargetBB))
+            Weight = 0;
           if (Weight == 0)
             Weight++;
           auto *E = &addEdge(&BB, TargetBB, Weight);
@@ -291,10 +301,14 @@ template <class Edge, class BBInfo> class CFGMST {
     return *AllEdges.back();
   }
 
-  CFGMST(Function &Func, bool InstrumentFuncEntry,
+  CFGMST(Function &Func, bool InstrumentFuncEntry, bool InstrumentLoopEntries,
          BranchProbabilityInfo *BPI = nullptr,
-         BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), BPI(BPI), BFI(BFI), InstrumentFuncEntry(InstrumentFuncEntry) {
+         BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr)
+      : F(Func), BPI(BPI), BFI(BFI), LI(LI),
+        InstrumentFuncEntry(InstrumentFuncEntry),
+        InstrumentLoopEntries(InstrumentLoopEntries) {
+    assert(!(InstrumentLoopEntries && !LI) &&
+           "expected a LoopInfo to instrumenting loop entries");
     buildEdges();
     sortEdgesByWeight();
     computeMinimumSpanningTree();
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index cd2c5d72ea5222..d9eee6ec4081a1 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -53,6 +53,9 @@ static InstrProfKind getProfileKindFromVersion(uint64_t Version) {
   if (Version & VARIANT_MASK_INSTR_ENTRY) {
     ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
   }
+  if (Version & VARIANT_MASK_INSTR_LOOP_ENTRIES) {
+    ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
+  }
   if (Version & VARIANT_MASK_BYTE_COVERAGE) {
     ProfileKind |= InstrProfKind::SingleByteCoverage;
   }
@@ -262,7 +265,9 @@ Error TextInstrProfReader::readHeader() {
     } else if (Str.equals_insensitive("entry_first"))
       ProfileKind |= InstrProfKind::FunctionEntryInstrumentation;
     else if (Str.equals_insensitive("not_entry_first"))
-      ProfileKind &= ~InstrProfKind::FunctionEntryInstrumentation;
+      ProfileKind &= ~InstrProfKind::LoopEntriesInstrumentation;
+    else if (Str.equals_insensitive("instrument_loop_entries"))
+      ProfileKind |= InstrProfKind::LoopEntriesInstrumentation;
     else if (Str.equals_insensitive("single_byte_coverage"))
       ProfileKind |= InstrProfKind::SingleByteCoverage;
     else if (Str.equals_insensitive("temporal_prof_traces")) {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 87a538f35c7865..f1ac5830fcd385 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -909,6 +909,9 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) {
   if (static_cast<bool>(ProfileKind &
                         InstrProfKind::FunctionEntryInstrumentation))
     Header.Version |= VARIANT_MASK_INSTR_ENTRY;
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::LoopEntriesInstrumentation))
+    Header.Version |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
     Header.Version |= VARIANT_MASK_BYTE_COVERAGE;
   if (static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly))
@@ -1152,6 +1155,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) {
   if (static_cast<bool>(ProfileKind &
                         InstrProfKind::FunctionEntryInstrumentation))
     OS << "# Always instrument the function entry block\n:entry_first\n";
+  if (static_cast<bool>(ProfileKind &
+                        InstrProfKind::LoopEntriesInstrumentation))
+    OS << "# Always instrument the loop entry "
+          "blocks\n:instrument_loop_entries\n";
   if (static_cast<bool>(ProfileKind & InstrProfKind::SingleByteCoverage))
     OS << "# Instrument block coverage\n:single_byte_coverage\n";
   InstrProfSymtab Symtab;
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index 2ea89be40a3d46..f9be7f933d31e4 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -820,7 +820,8 @@ bool GCOVProfiler::emitProfileNotes(
       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
                                    BFI);
 
-      CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry_=*/false, BPI, BFI);
+      CFGMST<Edge, BBInfo> MST(F, /*InstrumentFuncEntry=*/false,
+                               /*InstrumentLoopEntries=*/false, BPI, BFI);
 
       // getInstrBB can split basic blocks and push elements to AllEdges.
       for (size_t I : llvm::seq<size_t>(0, MST.numEdges())) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 4d8141431a0c19..471086ce3a7511 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -259,6 +259,11 @@ static cl::opt<bool> PGOInstrumentEntry(
     "pgo-instrument-entry", cl::init(false), cl::Hidden,
     cl::desc("Force to instrument function entry basicblock."));
 
+static cl::opt<bool>
+    PGOInstrumentLoopEntries("pgo-instrument-loop-entries", cl::init(false),
+                             cl::Hidden,
+                             cl::desc("Force to instrument loop entries."));
+
 static cl::opt<bool> PGOFunctionEntryCoverage(
     "pgo-function-entry-coverage", cl::Hidden,
     cl::desc(
@@ -359,6 +364,7 @@ class FunctionInstrumenter final {
   std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
   BranchProbabilityInfo *const BPI;
   BlockFrequencyInfo *const BFI;
+  LoopInfo *const LI;
 
   const PGOInstrumentationType InstrumentationType;
 
@@ -376,14 +382,17 @@ class FunctionInstrumenter final {
            InstrumentationType == PGOInstrumentationType::CTXPROF;
   }
 
+  bool shouldInstrumentLoopEntries() const { return PGOInstrumentLoopEntries; }
+
 public:
   FunctionInstrumenter(
       Module &M, Function &F, TargetLibraryInfo &TLI,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       BranchProbabilityInfo *BPI = nullptr, BlockFrequencyInfo *BFI = nullptr,
+      LoopInfo *LI = nullptr,
       PGOInstrumentationType InstrumentationType = PGOInstrumentationType::FDO)
       : M(M), F(F), TLI(TLI), ComdatMembers(ComdatMembers), BPI(BPI), BFI(BFI),
-        InstrumentationType(InstrumentationType) {}
+        LI(LI), InstrumentationType(InstrumentationType) {}
 
   void instrument();
 };
@@ -439,6 +448,8 @@ createIRLevelProfileFlagVar(Module &M,
   if (PGOInstrumentEntry ||
       InstrumentationType == PGOInstrumentationType::CTXPROF)
     ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
+  if (PGOInstrumentLoopEntries)
+    ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES;
   if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
     ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
   if (PGOFunctionEntryCoverage)
@@ -625,12 +636,13 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       Function &Func, TargetLibraryInfo &TLI,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
-      BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
-      bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
+      BlockFrequencyInfo *BFI = nullptr, LoopInfo *LI = nullptr,
+      bool IsCS = false, bool InstrumentFuncEntry = true,
+      bool InstrumentLoopEntries = false, bool HasSingleByteCoverage = false)
       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
         TLI(TLI), ValueSites(IPVK_Last + 1),
         SIVisitor(Func, HasSingleByteCoverage),
-        MST(F, InstrumentFuncEntry, BPI, BFI),
+        MST(F, InstrumentFuncEntry, InstrumentLoopEntries, BPI, BFI, LI),
         BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
     if (BCI && PGOViewBlockCoverageGraph)
       BCI->viewBlockCoverageGraph();
@@ -916,9 +928,10 @@ void FunctionInstrumenter::instrument() {
 
   const bool IsCtxProf = InstrumentationType == PGOInstrumentationType::CTXPROF;
   FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
-      F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI,
+      F, TLI, ComdatMembers, /*CreateGlobalVar=*/!IsCtxProf, BPI, BFI, LI,
       InstrumentationType == PGOInstrumentationType::CSFDO,
-      shouldInstrumentEntryBB(), PGOBlockCoverage);
+      shouldInstrumentEntryBB(), shouldInstrumentLoopEntries(),
+      PGOBlockCoverage);
 
   auto *const Name = IsCtxProf ? cast<GlobalValue>(&F) : FuncInfo.FuncNameVar;
   auto *const CFGHash =
@@ -1136,11 +1149,13 @@ class PGOUseFunc {
   PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
-             ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
+             LoopInfo *LI, ProfileSummaryInfo *PSI, bool IsCS,
+             bool InstrumentFuncEntry, bool InstrumentLoopEntries,
              bool HasSingleByteCoverage)
       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
-        FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
-                 InstrumentFuncEntry, HasSingleByteCoverage),
+        FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, LI, IsCS,
+                 InstrumentFuncEntry, InstrumentLoopEntries,
+                 HasSingleByteCoverage),
         FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
 
   void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
@@ -1923,6 +1938,7 @@ static bool InstrumentAllFunctions(
     Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
+    function_ref<LoopInfo *(Function &)> LookupLI,
     PGOInstrumentationType InstrumentationType) {
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
@@ -1943,10 +1959,11 @@ static bool InstrumentAllFunctions(
   for (auto &F : M) {
     if (skipPGOGen(F))
       continue;
-    auto &TLI = LookupTLI(F);
-    auto *BPI = LookupBPI(F);
-    auto *BFI = LookupBFI(F);
-    FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI,
+    TargetLibraryInfo &TLI = LookupTLI(F);
+    BranchProbabilityInfo *BPI = LookupBPI(F);
+    BlockFrequencyInfo *BFI = LookupBFI(F);
+    LoopInfo *LI = LookupLI(F);
+    FunctionInstrumenter FI(M, F, TLI, ComdatMembers, BPI, BFI, LI,
                             InstrumentationType);
     FI.instrument();
   }
@@ -1980,8 +1997,11 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
+  auto LookupLI = [&FAM](Function &F) {
+    return &FAM.getResult<LoopAnalysis>(F);
+  };
 
-  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI,
+  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, LookupLI,
                               InstrumentationType))
     return PreservedAnalyses::all();
 
@@ -2116,7 +2136,8 @@ static bool annotateAllFunctions(
     function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
-    ProfileSummaryInfo *PSI, bool IsCS) {
+    function_ref<LoopInfo *(Function &)> LookupLI, ProfileSummaryInfo *PSI,
+    bool IsCS) {
   LLVM_DEBUG(dbgs() << "Read in profile counters: ");
   auto &Ctx = M.getContext();
   // Read the counter array from file.
@@ -2181,22 +2202,27 @@ static bool annotateAllFunctions(
   bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
   if (PGOInstrumentEntry.getNumOccurrences() > 0)
     InstrumentFuncEntry = PGOInstrumentEntry;
+  bool InstrumentLoopEntries = PGOReader->instrLoopEntriesEnabled();
+  if (PGOInstrumentLoopEntries.getNumOccurrences() > 0)
+    InstrumentLoopEntries = PGOInstrumentLoopEntries;
 
   bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
   for (auto &F : M) {
     if (skipPGOUse(F))
       continue;
-    auto &TLI = LookupTLI(F);
-    auto *BPI = LookupBPI(F);
-    auto *BFI = LookupBFI(F);
+    TargetLibraryInfo &TLI = LookupTLI(F);
+    BranchProbabilityInfo *BPI = LookupBPI(F);
+    BlockFrequencyInfo *BFI = LookupBFI(F);
+    LoopInfo *LI = LookupLI(F);
     if (!HasSingleByteCoverage) {
       // Split indirectbr critical edges here before computing the MST rather
       // than later in getInstrBB() to avoid invalidating it.
       SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
                                    BFI);
     }
-    PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
-                    InstrumentFuncEntry, HasSingleByteCoverage);
+    PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, LI, PSI, IsCS,
+                    InstrumentFuncEntry, InstrumentLoopEntries,
+                    HasSingleByteCoverage);
     if (HasSingleByteCoverage) {
       Func.populateCoverage(PGOReader.get());
       continue;
@@ -2335,10 +2361,14 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
+  auto LookupLI = [&FAM](Function &F) {
+    return &FAM.getResult<LoopAnalysis>(F);
+  };
 
   auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
-                            LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
+                            LookupTLI, LookupBPI, LookupBFI, LookupLI, PSI,
+                            IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop3.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop3.proftext
new file mode 100644
index 00000000000000..5f29fed61a3671
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/loop3.proftext
@@ -0,0 +1,12 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+123456
+12
+8
+
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop3_function_entry.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop3_function_entry.proftext
new file mode 100644
index 00000000000000..13b326722d072b
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/loop3_function_entry.proftext
@@ -0,0 +1,13 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
+# Always instrument the function entry block
+:entry_first
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+12
+123456
+8
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/loop3_loop_entries.proftext b/llvm/test/Transforms/PGOProfile/Inputs/loop3_loop_entries.proftext
new file mode 100644
index 00000000000000..e95f155717fd5f
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/Inputs/loop3_loop_entries.proftext
@@ -0,0 +1,13 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
+# Always instrument the loop entry blocks
+:instrument_loop_entries
+test_simple_for_with_bypass
+# Func Hash:
+536873292337293370
+# Num Counters:
+3
+# Counter Values:
+12
+8
+123456
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw b/llvm/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.profraw
index 3daa98f937b691880ffff203c9426bfacddf749d..a37503edc6b648623db64c739d50fa8bc242a8db 100644
GIT binary patch
delta 125
zcmZ3$vVcXPu_!ISs37M*7Xt`1FiaHi;0sWg-1KJU|NmLmIkm!J6Vt5N98$onWmaqf
s5Y{s*z63Cffnmb~klbWFMkBTjV4=xrjI$IV>KGUp!R9hBKukdJ0bGbAaR2}S

delta 125
zcmZ3$vVcXPu_!ISs37M*7XuhDP89Iq%eeITiS(-f|2Nl3OFs#nm}bQmunNptX2q5O
sVLh|rD*&??7-s0n098%aV>Dtr02Z2@#yCp>qK<)q5n?U_!~_H%00CtuO#lD@

diff --git a/llvm/test/Transforms/PGOProfile/counter_promo_exit_merge.ll b/llvm/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
index c2af706df787e1..42cb492a552fcb 100644
--- a/llvm/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
+++ b/llvm/test/Transforms/PGOProfile/counter_promo_exit_merge.ll
@@ -3,7 +3,7 @@
 $__llvm_profile_raw_version = comdat any
 
 @g = common local_unnamed_addr global i32 0, align 4
- at __llvm_profile_raw_version = constant i64 72057594037927940, comdat
+ at __llvm_profile_raw_version = constant i64 36028797018963972, comdat
 @__profn_foo = private constant [3 x i8] c"foo"
 
 define void @foo(i32 %arg) local_unnamed_addr {
diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll
index 56d8364d8f5431..413165e385e542 100644
--- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll
+++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_fast.ll
@@ -9,7 +9,7 @@ $__llvm_profile_raw_version = comdat any
 
 ; SAMPLE-VAR: $__llvm_profile_sampling = comdat any
 
- at __llvm_profile_raw_version = constant i64 72057594037927940, comdat
+ at __llvm_profile_raw_version = constant i64 36028797018963972, comdat
 @__profn_f = private constant [1 x i8] c"f"
 
 ; SAMPLE-VAR: @__llvm_profile_sampling = thread_local global i16 0, comdat
diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll
index 726df2886ca840..1d055ee9fe7787 100644
--- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll
+++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 $__llvm_profile_raw_version = comdat any
 
- at __llvm_profile_raw_version = constant i64 72057594037927940, comdat
+ at __llvm_profile_raw_version = constant i64 36028797018963972, comdat
 @__profn_f = private constant [1 x i8] c"f"
 
 define void @f() {
diff --git a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll
index 2d6323c1034715..7a7e8d16494961 100644
--- a/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll
+++ b/llvm/test/Transforms/PGOProfile/instrprof_burst_sampling_full_intsize.ll
@@ -6,7 +6,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 $__llvm_profile_raw_version = comdat any
 
- at __llvm_profile_raw_version = constant i64 72057594037927940, comdat
+ at __llvm_profile_raw_version = constant i64 36028797018963972, comdat
 @__profn_f = private constant [1 x i8] c"f"
 
 define void @f() {
diff --git a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll
index 5ef93af881c4b3..bd5b4ea68e1b8b 100644
--- a/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll
+++ b/llvm/test/Transforms/PGOProfile/instrprof_simple_sampling.ll
@@ -7,7 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
 
 $__llvm_profile_raw_version = comdat any
 
- at __llvm_profile_raw_version = constant i64 72057594037927940, comdat
+ at __llvm_profile_raw_version = constant i64 36028797018963972, comdat
 @__profn_f = private constant [1 x i8] c"f"
 
 define void @f() {
diff --git a/llvm/test/Transforms/PGOProfile/loop3.ll b/llvm/test/Transforms/PGOProfile/loop3.ll
new file mode 100644
index 00000000000000..22243dc9b4d276
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/loop3.ll
@@ -0,0 +1,78 @@
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=false -S | FileCheck %s --check-prefixes=GEN,NOTLOOPENTRIES
+; RUN: llvm-profdata merge %S/Inputs/loop3.proftext -o %t.profdata
+; RUN: opt %s -passes=pgo-instr-use -pgo-instrument-loop-entries=false -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-loop-entries=true -S | FileCheck %s --check-prefixes=GEN,LOOPENTRIES
+; RUN: llvm-profdata merge %S/Inputs/loop3_loop_entries.proftext -o %t.profdata
+; RUN: opt %s -passes=pgo-instr-use -pgo-instrument-loop-entries=true -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt %s -passes=pgo-instr-gen -pgo-instrument-entry=true -S | FileCheck %s --check-prefixes=GEN,FUNCTIONENTRY
+; RUN: llvm-profdata merge %S/Inputs/loop3_function_entry.proftext -o %t.profdata
+; RUN: opt %s -passes=pgo-instr-use -pgo-instrument-entry=true -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
+; GEN: @__profn_test_simple_for_with_bypass = private constant [27 x i8] c"test_simple_for_with_bypass"
+
+define i32 @test_simple_for_with_bypass(i32 %n) {
+; USE: define i32 @test_simple_for_with_bypass(i32 %n)
+; USE-SAME: !prof ![[ENTRY_COUNT:[0-9]*]]
+entry:
+; GEN: entry:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+  %mask = and i32 %n, 65535
+  %skip = icmp eq i32 %mask, 0
+  br i1 %skip, label %end, label %for.entry
+; USE: br i1 %skip, label %end, label %for.entry
+; USE-SAME: !prof ![[BW_FOR_BYPASS:[0-9]+]]
+
+for.entry:
+; GEN: for.entry:
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+; NOTLOOPENTRIES-NOT: call void @llvm.instrprof.increment
+; FUNCTIONENTRY-NOT: call void @llvm.instrprof.increment
+  br label %for.cond
+
+for.cond:
+; GEN: for.cond:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %i = phi i32 [ 0, %for.entry ], [ %inc1, %for.inc ]
+  %sum = phi i32 [ 1, %for.entry ], [ %inc, %for.inc ]
+  %cmp = icmp slt i32 %i, %n
+  br i1 %cmp, label %for.body, label %for.end, !prof !1
+; USE: br i1 %cmp, label %for.body, label %for.end
+; USE-SAME: !prof ![[BW_FOR_COND:[0-9]+]]
+
+for.body:
+; GEN: for.body:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %inc = add nsw i32 %sum, 1
+  br label %for.inc
+
+for.inc:
+; GEN: for.inc:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 0)
+; LOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 1)
+  %inc1 = add nsw i32 %i, 1
+  br label %for.cond
+
+for.end:
+; GEN: for.end:
+; NOTLOOPENTRIES: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; FUNCTIONENTRY: call void @llvm.instrprof.increment(ptr @__profn_test_simple_for_with_bypass, i64 {{[0-9]+}}, i32 3, i32 2)
+; LOOPENTRIES-NOT: call void @llvm.instrprof.increment
+  br label %end
+
+end:
+; GEN: end:
+; GEN-NOT: call void @llvm.instrprof.increment
+  %final_sum = phi i32 [ %sum, %for.end ], [ 0, %entry ]
+  ret i32 %final_sum
+}
+
+!1 = !{!"branch_weights", i32 100000, i32 80}
+
+; USE: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 12}
+; USE: ![[BW_FOR_BYPASS]] = !{!"branch_weights", i32 4, i32 8}
+; USE: ![[BW_FOR_COND]] = !{!"branch_weights", i32 123456, i32 8}
diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
index aae1e2d8b4e491..75fd1e31acfa0d 100644
--- a/llvm/test/Transforms/PGOProfile/vtable_profile.ll
+++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll
@@ -49,7 +49,7 @@ target triple = "x86_64-unknown-linux-gnu"
 @llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata"
 
 ; GEN: __llvm_profile_raw_version = comdat any
-; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat
+; GEN: __llvm_profile_raw_version = hidden constant i64 36028797018963978, comdat
 ; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci"
 
 ; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8a42e430fb54e8..21715d57b9772e 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -2994,8 +2994,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) {
   std::unique_ptr<ProfileSummary> PS(Builder.getSummary());
   bool IsIR = Reader->isIRLevelProfile();
   OS << "Instrumentation level: " << (IsIR ? "IR" : "Front-end");
-  if (IsIR)
+  if (IsIR) {
     OS << "  entry_first = " << Reader->instrEntryBBEnabled();
+    OS << "  instrument_loop_entries = " << Reader->instrLoopEntriesEnabled();
+  }
   OS << "\n";
   if (ShowAllFunctions || !FuncNameFilter.empty())
     OS << "Functions shown: " << ShownFunctions << "\n";



More information about the llvm-commits mailing list