[llvm-branch-commits] [llvm] [BOLT] Drop high discrepancy profiles in matching (PR #95156)

shaw young via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Mon Jun 17 09:55:10 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95156

>From aa441dc0163d3d0f63de1e4dd1fa359180f82f1f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 11:43:13 -0700
Subject: [PATCH 01/15] Summary: Functions with little exact matching

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 8887d1f5d5bd4..bdc1d9dfd735c 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -614,6 +614,17 @@
 
 - `--lite-threshold-pct=<uint>`
 
+  Threshold (in percent) of matched profile at which stale profile inference is
+  applied to functions. Argument corresponds to the sum of matched execution
+  counts of function blocks divided by the sum of execution counts of function
+  blocks. E.g if the sum of a function blocks' execution counts is 100, the sum
+  of the function blocks' matched execution counts is 10, and the argument is 15
+  (15%), profile inference will not be applied to that function. A higher
+  threshold will correlate with fewer functions to process in cases of stale
+  profile. Default set to %5.
+
+- `--matched-profile-threshold=<uint>`
+
   Threshold (in percent) for selecting functions to process in lite mode. Higher
   threshold means fewer functions to process. E.g threshold of 90 means only top
   10 percent of functions with profile will be processed.
@@ -1161,4 +1172,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
\ No newline at end of file
+  Print non-default options after command line parsing

>From 46fa37a054a129ca36e7b6ae126273e40fddea98 Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Tue, 11 Jun 2024 14:32:40 -0700
Subject: [PATCH 02/15] Update SampleProfileInference.h

---
 llvm/include/llvm/Transforms/Utils/SampleProfileInference.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index c654715c0ae9f..9ccbd0fa88f3d 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -58,6 +58,7 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
+  uint64_t Sink{UINT64_MAX};
   // Matched execution count for the function.
   uint64_t MatchedExecCount{0};
 };

>From d532514257feb5e86232e76c437c99a41d5f2cea Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Tue, 11 Jun 2024 14:39:28 -0700
Subject: [PATCH 03/15] Update StaleProfileMatching.cpp

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 41afa6b4bbb19..47335163263a4 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -604,8 +604,8 @@ bool canApplyInference(const FlowFunction &Func,
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if (Func.MatchedExecCount / YamlBF.ExecCount >=
-      opts::MatchedProfileThreshold / 100)
+  if ((double)Func.MatchedExecCount / YamlBF.ExecCount >=
+      opts::MatchedProfileThreshold / 100.0)
     return false;
 
   bool HasExitBlocks = llvm::any_of(

>From 3fc6d72d866333d8ce964fdfaa748791d4f8d2b4 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 08:38:19 -0700
Subject: [PATCH 04/15] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 37 +++++++++++++++----
 .../Transforms/Utils/SampleProfileInference.h |  3 --
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 47335163263a4..cb356afdd2948 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -53,9 +53,9 @@ cl::opt<bool>
 
 cl::opt<unsigned> MatchedProfileThreshold(
     "matched-profile-threshold",
-    cl::desc("Percentage threshold of matched execution counts at which stale "
+    cl::desc("Percentage threshold of matched basic blocks at which stale "
              "profile inference is executed."),
-    cl::init(5), cl::Hidden, cl::cat(BoltOptCategory));
+    cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",
@@ -186,6 +186,17 @@ struct BlendedBlockHash {
   uint8_t SuccHash{0};
 };
 
+/// A data object containing function matching information.
+struct FunctionMatchingData {
+public:
+  /// The number of blocks matched exactly.
+  uint64_t MatchedExactBlocks{0};
+  /// The number of blocks matched loosely.
+  uint64_t MatchedLooseBlocks{0};
+  /// The number of execution counts matched.
+  uint64_t MatchedExecCounts{0};
+};
+
 /// The object is used to identify and match basic blocks in a BinaryFunction
 /// given their hashes computed on a binary built from several revisions behind
 /// release.
@@ -417,7 +428,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
+                          FlowFunction &Func,
+                          FunctionMatchingData &FuncMatchingData
+                          ) {
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
@@ -457,9 +470,11 @@ void matchWeightsByHashes(BinaryContext &BC,
       if (Matcher.isHighConfidenceMatch(BinHash, YamlHash)) {
         ++BC.Stats.NumMatchedBlocks;
         BC.Stats.MatchedSampleCount += YamlBB.ExecCount;
-        Func.MatchedExecCount += YamlBB.ExecCount;
+        FuncMatchingData.MatchedExecCounts += YamlBB.ExecCount;
+        FuncMatchingData.MatchedExactBlocks += 1;
         LLVM_DEBUG(dbgs() << "  exact match\n");
       } else {
+        FuncMatchingData.MatchedLooseBlocks += 1;
         LLVM_DEBUG(dbgs() << "  loose match\n");
       }
       if (YamlBB.NumInstructions == BB->size())
@@ -600,14 +615,17 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 /// Currently we skip inference for (very) large instances and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
 bool canApplyInference(const FlowFunction &Func,
-                       const yaml::bolt::BinaryFunctionProfile &YamlBF) {
+                       const yaml::bolt::BinaryFunctionProfile &YamlBF,
+                       const FunctionMatchingData &FuncMatchingData) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)Func.MatchedExecCount / YamlBF.ExecCount >=
+  if ((double)FuncMatchingData.MatchedExactBlocks/YamlBF.Blocks.size() >=
       opts::MatchedProfileThreshold / 100.0)
     return false;
 
+  
+
   bool HasExitBlocks = llvm::any_of(
       Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
   if (!HasExitBlocks)
@@ -757,18 +775,21 @@ bool YAMLProfileReader::inferStaleProfile(
   const BinaryFunction::BasicBlockOrderType BlockOrder(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
+  // Create a containter for function matching data.
+  FunctionMatchingData FuncMatchingData;
+
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
+  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, FuncMatchingData);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
   preprocessUnreachableBlocks(Func);
 
   // Check if profile inference can be applied for the instance.
-  if (!canApplyInference(Func, YamlBF))
+  if (!canApplyInference(Func, YamlBF, FuncMatchingData))
     return false;
 
   // Apply the profile inference algorithm.
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 9ccbd0fa88f3d..5be8b3b218034 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -58,9 +58,6 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
-  uint64_t Sink{UINT64_MAX};
-  // Matched execution count for the function.
-  uint64_t MatchedExecCount{0};
 };
 
 /// Various thresholds and options controlling the behavior of the profile

>From 1ae7f029774ab6a904a71eaabcd467629dce688d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 08:51:44 -0700
Subject: [PATCH 05/15] spr amend

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 11 +++--------
 bolt/lib/Profile/StaleProfileMatching.cpp | 13 ++++++-------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index bdc1d9dfd735c..f63a80d95e7e3 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -614,14 +614,9 @@
 
 - `--lite-threshold-pct=<uint>`
 
-  Threshold (in percent) of matched profile at which stale profile inference is
-  applied to functions. Argument corresponds to the sum of matched execution
-  counts of function blocks divided by the sum of execution counts of function
-  blocks. E.g if the sum of a function blocks' execution counts is 100, the sum
-  of the function blocks' matched execution counts is 10, and the argument is 15
-  (15%), profile inference will not be applied to that function. A higher
-  threshold will correlate with fewer functions to process in cases of stale
-  profile. Default set to %5.
+  Threshold (in percent) of matched profile where profile inference is applied
+  to functions. If the ratio of a function's exactly matched blocks to number of
+  blocks is less than the threshold, inference isn't applied. Default is %0.
 
 - `--matched-profile-threshold=<uint>`
 
diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cb356afdd2948..4050e10b05b6f 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -429,8 +429,7 @@ void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
                           FlowFunction &Func,
-                          FunctionMatchingData &FuncMatchingData
-                          ) {
+                          FunctionMatchingData &FuncMatchingData) {
   assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
@@ -612,7 +611,8 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 }
 
 /// Decide if stale profile matching can be applied for a given function.
-/// Currently we skip inference for (very) large instances and for instances
+/// Currently we skip inference for (very) large instances, instances where the
+/// number of matched basic blocks is below a set threshold,  and for instances
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
 bool canApplyInference(const FlowFunction &Func,
                        const yaml::bolt::BinaryFunctionProfile &YamlBF,
@@ -620,12 +620,10 @@ bool canApplyInference(const FlowFunction &Func,
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)FuncMatchingData.MatchedExactBlocks/YamlBF.Blocks.size() >=
+  if ((double)FuncMatchingData.MatchedExactBlocks / YamlBF.Blocks.size() >=
       opts::MatchedProfileThreshold / 100.0)
     return false;
 
-  
-
   bool HasExitBlocks = llvm::any_of(
       Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
   if (!HasExitBlocks)
@@ -782,7 +780,8 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func, FuncMatchingData);
+  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
+                       FuncMatchingData);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.

>From 544b2eb7eb8b8df7a7d1ffec4715e4c553b9d874 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 12:03:59 -0700
Subject: [PATCH 06/15] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cabff3067113f..650cd5708ab78 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -55,7 +55,7 @@ cl::opt<unsigned> StaleMatchingMinMatchedBlock(
     "stale-matching-min-matched-block",
     cl::desc("Percentage threshold of matched basic blocks at which stale "
              "profile inference is executed."),
-    cl::init(50), cl::Hidden, cl::cat(BoltOptCategory));
+    cl::init(0), cl::Hidden, cl::cat(BoltOptCategory));
 
 cl::opt<unsigned> StaleMatchingMaxFuncSize(
     "stale-matching-max-func-size",

>From 72feffe09450c30e630b2bbcaf30d1a9b3c45bc6 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 13:36:35 -0700
Subject: [PATCH 07/15] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 25 +++++++++--------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 650cd5708ab78..13e53cbd59701 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -186,13 +186,6 @@ struct BlendedBlockHash {
   uint8_t SuccHash{0};
 };
 
-/// A data object containing function matching information.
-struct FunctionMatchingData {
-public:
-  /// The number of blocks matched exactly and loosely.
-  uint64_t MatchedBlocks{0};
-};
-
 /// The object is used to identify and match basic blocks in a BinaryFunction
 /// given their hashes computed on a binary built from several revisions behind
 /// release.
@@ -407,7 +400,8 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func) {
+                          FlowFunction &Func,
+                          uint64_t &MatchedBlocksCount) {
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
   std::vector<FlowBlock *> Blocks;
@@ -453,7 +447,6 @@ void matchWeightsByHashes(BinaryContext &BC,
       }
       if (YamlBB.NumInstructions == BB->size())
         ++BC.Stats.NumStaleBlocksWithEqualIcount;
-      FuncMatchingData.MatchedBlocks += 1;
     } else {
       LLVM_DEBUG(
           dbgs() << "Couldn't match yaml block (bid = " << YamlBB.Index << ")"
@@ -465,6 +458,8 @@ void matchWeightsByHashes(BinaryContext &BC,
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
+  MatchedBlocksCount = MatchedBlocks.size();
+
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
@@ -591,11 +586,11 @@ void preprocessUnreachableBlocks(FlowFunction &Func) {
 /// having "unexpected" control flow (e.g., having no sink basic blocks).
 bool canApplyInference(const FlowFunction &Func,
                        const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                       const FunctionMatchingData &FuncMatchingData) {
+                       const uint64_t &MatchedBlocks) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)(FuncMatchingData.MatchedBlocks) / YamlBF.Blocks.size() <=
+  if ((double)(MatchedBlocks) / YamlBF.Blocks.size() <=
       opts::StaleMatchingMinMatchedBlock / 100.0)
     return false;
 
@@ -745,22 +740,22 @@ bool YAMLProfileReader::inferStaleProfile(
   const BinaryFunction::BasicBlockOrderType BlockOrder(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
-  // Create a containter for function matching data.
-  FunctionMatchingData FuncMatchingData;
+  // Tracks the number of matched blocks.
+  uint64_t MatchedBlocks;
 
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
   matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
-                       FuncMatchingData);
+                       MatchedBlocks);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.
   preprocessUnreachableBlocks(Func);
 
   // Check if profile inference can be applied for the instance.
-  if (!canApplyInference(Func, YamlBF, FuncMatchingData))
+  if (!canApplyInference(Func, YamlBF, MatchedBlocks))
     return false;
 
   // Apply the profile inference algorithm.

>From a4c4c397d5de392af0ecebb88f1e301ff2658123 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 14:11:45 -0700
Subject: [PATCH 08/15] spr amend

Created using spr 1.3.4
---
 ...arge_profile_stale_low_matched_blocks.yaml | 57 +++++++++++++++++++
 .../X86/stale-matching-min-matched-block.test |  8 +++
 2 files changed, 65 insertions(+)
 create mode 100644 bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml
 create mode 100644 bolt/test/X86/stale-matching-min-matched-block.test

diff --git a/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml
new file mode 100644
index 0000000000000..785e23922ce49
--- /dev/null
+++ b/bolt/test/X86/Inputs/blarge_profile_stale_low_matched_blocks.yaml
@@ -0,0 +1,57 @@
+---
+header:
+  profile-version: 1
+  binary-name:     'reader-yaml.test.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            SolveCubic
+    fid:             6
+    hash:            0x0000000000000000
+    exec:            151
+    nblocks:         18
+    blocks:
+      - bid:             0
+        insns:           43
+        hash:            0x4600940a609c0000
+        exec:            151
+        succ:            [ { bid: 1, cnt: 151, mis: 2 }, { bid: 7, cnt: 0 } ]
+      - bid:             1
+        insns:           7
+        hash:            0x167a1f084f130088
+        succ:            [ { bid: 13, cnt: 151 }, { bid: 2, cnt: 0 } ]
+      - bid:             13
+        insns:           26
+        hash:            0xa8d50000f81902a7
+        succ:            [ { bid: 3, cnt: 89 }, { bid: 2, cnt: 10 } ]
+      - bid:             3
+        insns:           9
+        hash:            0xc516000073dc00a0
+        succ:            [ { bid: 5, cnt: 151 } ]
+      - bid:             5
+        insns:           9
+        hash:            0x6446e1ea500111
+  - name:            usqrt
+    fid:             7
+    hash:            0x0000000000000000
+    exec:            20
+    nblocks:         6
+    blocks:
+      - bid:             0
+        insns:           4
+        hash:            0x0000000000000001
+        exec:            20
+        succ:            [ { bid: 1, cnt: 0 } ]
+      - bid:             1
+        insns:           9
+        hash:            0x0000000000000001
+        succ:            [ { bid: 3, cnt: 320, mis: 171 }, { bid: 2, cnt: 0 } ]
+      - bid:             3
+        insns:           2
+        hash:            0x0000000000000001
+        succ:            [ { bid: 1, cnt: 300, mis: 33 }, { bid: 4, cnt: 20 } ]
+...
diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test
new file mode 100644
index 0000000000000..d07467ee82b9e
--- /dev/null
+++ b/bolt/test/X86/stale-matching-min-matched-block.test
@@ -0,0 +1,8 @@
+## This script checks the stale-matching-min-matched-block flag.
+
+## Testing "usqrt"
+RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \
+RUN:   --print-cfg --print-only=usqrt --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
+RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 --debug-only=bolt-prof 2>&1 | FileCheck %s
+
+CHECK:    BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192)

>From 77a48e7ca4b1000cee0aa80d8a2e7bb4a5ed218e Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 14:26:42 -0700
Subject: [PATCH 09/15] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 13e53cbd59701..0131faccea07d 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -400,8 +400,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func,
-                          uint64_t &MatchedBlocksCount) {
+                          FlowFunction &Func, uint64_t &MatchedBlocksCount) {
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
   std::vector<FlowBlock *> Blocks;

>From a5e2c0e5da5914bbf6b73ea5f776dbddbaf48945 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 18:04:50 -0700
Subject: [PATCH 10/15] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/stale-matching-min-matched-block.test | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test
index d07467ee82b9e..06bcb7061717d 100644
--- a/bolt/test/X86/stale-matching-min-matched-block.test
+++ b/bolt/test/X86/stale-matching-min-matched-block.test
@@ -1,8 +1,11 @@
 ## This script checks the stale-matching-min-matched-block flag.
 
+REQUIRES: asserts
+RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
+
 ## Testing "usqrt"
 RUN: llvm-bolt %t.exe -o %t.null --b %p/Inputs/blarge_profile_stale_low_matched_blocks.yaml \
-RUN:   --print-cfg --print-only=usqrt --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
-RUN:   --profile-ignore-hash=1 --profile-use-dfs=0 --debug-only=bolt-prof 2>&1 | FileCheck %s
+RUN:   --infer-stale-profile=1 --stale-matching-min-matched-block=75 \
+RUN:   --profile-ignore-hash=1 --debug-only=bolt-prof 2>&1 | FileCheck %s
 
 CHECK:    BOLT-INFO: inferred profile for 1 (50.00% of profiled, 50.00% of stale) functions responsible for 46.31% samples (552 out of 1192)

>From c5361281aac78f5337a7ce354079942134b4bc49 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 14 Jun 2024 18:10:54 -0700
Subject: [PATCH 11/15] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/stale-matching-min-matched-block.test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/test/X86/stale-matching-min-matched-block.test b/bolt/test/X86/stale-matching-min-matched-block.test
index 06bcb7061717d..383d4d7dcb9d2 100644
--- a/bolt/test/X86/stale-matching-min-matched-block.test
+++ b/bolt/test/X86/stale-matching-min-matched-block.test
@@ -1,6 +1,5 @@
 ## This script checks the stale-matching-min-matched-block flag.
 
-REQUIRES: asserts
 RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
 
 ## Testing "usqrt"

>From 2398e963495690ba2c82c706337d85e37b040d4a Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Sat, 15 Jun 2024 00:12:56 -0700
Subject: [PATCH 12/15] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 0131faccea07d..909315fd92657 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -740,7 +740,7 @@ bool YAMLProfileReader::inferStaleProfile(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
   // Tracks the number of matched blocks.
-  uint64_t MatchedBlocks;
+  uint64_t MatchedBlocks{0};
 
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);

>From cb0efe0bd336fdf4fee40a9d9101b161d38e1311 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 08:52:41 -0700
Subject: [PATCH 13/15] spr ameand

Created using spr 1.3.4
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 909315fd92657..1e0531a43d50f 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -397,10 +397,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-void matchWeightsByHashes(BinaryContext &BC,
-                          const BinaryFunction::BasicBlockOrderType &BlockOrder,
-                          const yaml::bolt::BinaryFunctionProfile &YamlBF,
-                          FlowFunction &Func, uint64_t &MatchedBlocksCount) {
+uint64_t matchWeightsByHashes(
+    BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
+    const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
   assert(Func.Blocks.size() == BlockOrder.size() + 1);
 
   std::vector<FlowBlock *> Blocks;
@@ -457,8 +456,6 @@ void matchWeightsByHashes(BinaryContext &BC,
     BC.Stats.StaleSampleCount += YamlBB.ExecCount;
   }
 
-  MatchedBlocksCount = MatchedBlocks.size();
-
   // Match jumps from the profile to the jumps from CFG
   std::vector<uint64_t> OutWeight(Func.Blocks.size(), 0);
   std::vector<uint64_t> InWeight(Func.Blocks.size(), 0);
@@ -508,6 +505,8 @@ void matchWeightsByHashes(BinaryContext &BC,
     Block.HasUnknownWeight = false;
     Block.Weight = std::max(OutWeight[Block.Index], InWeight[Block.Index]);
   }
+
+  return MatchedBlocks.size();
 }
 
 /// The function finds all blocks that are (i) reachable from the Entry block
@@ -589,8 +588,8 @@ bool canApplyInference(const FlowFunction &Func,
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  if ((double)(MatchedBlocks) / YamlBF.Blocks.size() <=
-      opts::StaleMatchingMinMatchedBlock / 100.0)
+  if (MatchedBlocks * 100 <
+      opts::StaleMatchingMinMatchedBlock * YamlBF.Blocks.size())
     return false;
 
   bool HasExitBlocks = llvm::any_of(
@@ -740,14 +739,13 @@ bool YAMLProfileReader::inferStaleProfile(
       BF.getLayout().block_begin(), BF.getLayout().block_end());
 
   // Tracks the number of matched blocks.
-  uint64_t MatchedBlocks{0};
 
   // Create a wrapper flow function to use with the profile inference algorithm.
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func,
-                       MatchedBlocks);
+  uint64_t MatchedBlocks =
+      matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that
   // they don't get any counts assigned.

>From 8550bc3e9ac37432386b3d11909fd050ee7487ad Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Mon, 17 Jun 2024 09:53:57 -0700
Subject: [PATCH 14/15] Update bolt/lib/Profile/StaleProfileMatching.cpp

Co-authored-by: Amir Ayupov <aaupov at fb.com>
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 1e0531a43d50f..6b868d30db93d 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -397,7 +397,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 /// of the basic blocks in the binary, the count is "matched" to the block.
 /// Similarly, if both the source and the target of a count in the profile are
 /// matched to a jump in the binary, the count is recorded in CFG.
-uint64_t matchWeightsByHashes(
+size_t matchWeightsByHashes(
     BinaryContext &BC, const BinaryFunction::BasicBlockOrderType &BlockOrder,
     const yaml::bolt::BinaryFunctionProfile &YamlBF, FlowFunction &Func) {
   assert(Func.Blocks.size() == BlockOrder.size() + 1);

>From de9760437b98f922c0ae1d94791513a3faa307f6 Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Mon, 17 Jun 2024 09:54:56 -0700
Subject: [PATCH 15/15] Update StaleProfileMatching.cpp

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 6b868d30db93d..c9bcd04281e72 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -744,7 +744,7 @@ bool YAMLProfileReader::inferStaleProfile(
   FlowFunction Func = createFlowFunction(BlockOrder);
 
   // Match as many block/jump counts from the stale profile as possible
-  uint64_t MatchedBlocks =
+  size_t MatchedBlocks =
       matchWeightsByHashes(BF.getBinaryContext(), BlockOrder, YamlBF, Func);
 
   // Adjust the flow function by marking unreachable blocks Unlikely so that



More information about the llvm-branch-commits mailing list