[llvm-branch-commits] [llvm] [BOLT] Drop high discrepancy profiles in matching (PR #95156)

Fri Jun 14 11:31:22 PDT 2024

https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95156

>From aa441dc0163d3d0f63de1e4dd1fa359180f82f1f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 11:43:13 -0700
Subject: [PATCH 1/5] Summary: Functions with little exact matching

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 8887d1f5d5bd4..bdc1d9dfd735c 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -614,6 +614,17 @@
 
 - `--lite-threshold-pct=<uint>`
 
+  Threshold (in percent) of matched profile at which stale profile inference is
+  applied to functions. Argument corresponds to the sum of matched execution
+  counts of function blocks divided by the sum of execution counts of function
+  blocks. E.g if the sum of a function blocks' execution counts is 100, the sum
+  of the function blocks' matched execution counts is 10, and the argument is 15
+  (15%), profile inference will not be applied to that function. A higher
+  threshold will correlate with fewer functions to process in cases of stale
+  profile. Default set to %5.
+
+- `--matched-profile-threshold=<uint>`
+
   Threshold (in percent) for selecting functions to process in lite mode. Higher
   threshold means fewer functions to process. E.g threshold of 90 means only top
   10 percent of functions with profile will be processed.
@@ -1161,4 +1172,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
\ No newline at end of file
+  Print non-default options after command line parsing

>From 46fa37a054a129ca36e7b6ae126273e40fddea98 Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Tue, 11 Jun 2024 14:32:40 -0700
Subject: [PATCH 2/5] Update SampleProfileInference.h

---
 llvm/include/llvm/Transforms/Utils/SampleProfileInference.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index c654715c0ae9f..9ccbd0fa88f3d 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -58,6 +58,7 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
+  uint64_t Sink{UINT64_MAX};
   // Matched execution count for the function.
   uint64_t MatchedExecCount{0};
 };

>From d532514257feb5e86232e76c437c99a41d5f2cea Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Tue, 11 Jun 2024 14:39:28 -0700
Subject: [PATCH 3/5] Update StaleProfileMatching.cpp

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 41afa6b4bbb19..47335163263a4 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -604,8 +604,8 @@ bool canApplyInference(const FlowFunction &Func,
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if (Func.MatchedExecCount / YamlBF.ExecCount >=
-      opts::MatchedProfileThreshold / 100)
+  if ((double)Func.MatchedExecCount / YamlBF.ExecCount >=
+      opts::MatchedProfileThreshold / 100.0)
     return false;
 
   bool HasExitBlocks = llvm::any_of(

>From 3fc6d72d866333d8ce964fdfaa748791d4f8d2b4 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 08:38:19 -0700
Subject: [PATCH 4/5] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 37 +++++++++++++++----
 .../Transforms/Utils/SampleProfileInference.h |  3 --
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 47335163263a4..cb356afdd2948 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -53,9 +53,9 @@ cl::opt<bool>
 
 cl::opt<unsigned> MatchedProfileThreshold(
     "matched-profile-threshold",
-    cl::desc("Percentage threshold of matched execution counts at which stale "
+    cl::desc("Percentage threshold of matched basic blocks at which stale "
              "profile inference is executed."),
-    cl::init(5), cl::Hidden, cl::cat(BoltOptCategory));
+    cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",
@@ -186,6 +186,17 @@ struct BlendedBlockHash {
   uint8_t SuccHash{0};
 };
 
+/// A data object containing function matching information.
+struct FunctionMatchingData {
+public:
+  /// The number of blocks matched exactly.
+  uint64_t MatchedExactBlocks{0};
+  /// The number of blocks matched loosely.
+  uint64_t MatchedLooseBlocks{0};
+  /// The number of execution counts matched.
+  uint64_t MatchedExecCounts{0};
+};
+
 /// The object is used to identify and match basic blocks in a BinaryFunction
 /// given their hashes computed on a binary built from several revisions behind
 /// release.
@@ -417,7 +428,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
+                          FlowFunction &Func,
+                          FunctionMatchingData &FuncMatchingData
+                          ) {
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
@@ -457,9 +470,11 @@ void matchWeightsByHashes(BinaryContext &BC,
       if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
         ++BC.Stats.NumMatchedBlocks;
         BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
-        Func.MatchedExecCount += YamlBB.ExecCount;
+        FuncMatchingData.MatchedExecCounts += YamlBB.ExecCount;
+        FuncMatchingData.MatchedExactBlocks += 1;
         LLVM_DEBUG(dbgs() << "  exact match\n");
       } else {
+        FuncMatchingData.MatchedLooseBlocks += 1;
         LLVM_DEBUG(dbgs() << "  loose match\n");
       }
       if (YamlBB.NumInstructions == BB->size())
@@ -600,14 +615,17 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 /// Currently we skip inference for (very) large instances and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
 bool canApplyInference(const FlowFunction &Func,
-                       const yaml::bolt::BinaryFunctionProfile &YamlBF) {
+                       const yaml::bolt::BinaryFunctionProfile &YamlBF,
+                       const FunctionMatchingData &FuncMatchingData) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)Func.MatchedExecCount / YamlBF.ExecCount >=
+  if ((double)FuncMatchingData.MatchedExactBlocks/YamlBF.Blocks.size() >=
       opts::MatchedProfileThreshold / 100.0)
     return false;
 
+  
+
   bool HasExitBlocks = llvm::any_of(
       Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
   if (!HasExitBlocks)
@@ -757,18 +775,21 @@ bool YAMLProfileReader::inferStaleProfile(
   const BinaryFunction::BasicBlockOrderType BlockOrder(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
+  // Create a containter for function matching data.
+  FunctionMatchingData FuncMatchingData;
+
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, FuncMatchingData);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
   preprocessUnreachableBlocks(Func);
 
   // Check if profile inference can be applied for the instance.
-  if (!canApplyInference(Func, YamlBF))
+  if (!canApplyInference(Func, YamlBF, FuncMatchingData))
     return false;
 
   // Apply the profile inference algorithm.
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 9ccbd0fa88f3d..5be8b3b218034 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -58,9 +58,6 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
-  uint64_t Sink{UINT64_MAX};
-  // Matched execution count for the function.
-  uint64_t MatchedExecCount{0};
 };
 
 /// Various thresholds and options controlling the behavior of the profile

>From 1ae7f029774ab6a904a71eaabcd467629dce688d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 08:51:44 -0700
Subject: [PATCH 5/5] spr amend

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 11 +++--------
 bolt/lib/Profile/StaleProfileMatching.cpp | 13 ++++++-------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index bdc1d9dfd735c..f63a80d95e7e3 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -614,14 +614,9 @@
 
 - `--lite-threshold-pct=<uint>`
 
-  Threshold (in percent) of matched profile at which stale profile inference is
-  applied to functions. Argument corresponds to the sum of matched execution
-  counts of function blocks divided by the sum of execution counts of function
-  blocks. E.g if the sum of a function blocks' execution counts is 100, the sum
-  of the function blocks' matched execution counts is 10, and the argument is 15
-  (15%), profile inference will not be applied to that function. A higher
-  threshold will correlate with fewer functions to process in cases of stale
-  profile. Default set to %5.
+  Threshold (in percent) of matched profile where profile inference is applied
+  to functions. If the ratio of a function's exactly matched blocks to number of
+  blocks is less than the threshold, inference isn't applied. Default is %0.
 
 - `--matched-profile-threshold=<uint>`
 
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cb356afdd2948..4050e10b05b6f 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -429,8 +429,7 @@ void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
                           FlowFunction &Func,
-                          FunctionMatchingData &FuncMatchingData
-                          ) {
+                          FunctionMatchingData &FuncMatchingData) {
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
@@ -612,7 +611,8 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 }
 
 /// Decide if stale profile matching can be applied for a given function.
-/// Currently we skip inference for (very) large instances and for instances
+/// Currently we skip inference for (very) large instances, instances where the
+/// number of matched basic blocks is below a set threshold,  and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
 bool canApplyInference(const FlowFunction &Func,
                        const yaml::bolt::BinaryFunctionProfile &YamlBF,
@@ -620,12 +620,10 @@ bool canApplyInference(const FlowFunction &Func,
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)FuncMatchingData.MatchedExactBlocks/YamlBF.Blocks.size() >=
+  if ((double)FuncMatchingData.MatchedExactBlocks / YamlBF.Blocks.size() >=
       opts::MatchedProfileThreshold / 100.0)
     return false;
 
-  
-
   bool HasExitBlocks = llvm::any_of(
       Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
   if (!HasExitBlocks)
@@ -782,7 +780,8 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, FuncMatchingData);
+  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
+                       FuncMatchingData);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.