[llvm] [BOLT] Add sink block to flow CFG in profile inference (PR #95047)

shaw young via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 09:19:04 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95047

>From 1b984ba9747618e277c8ffb2c48467aec7c2a6a3 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 10 Jun 2024 12:08:38 -0700
Subject: [PATCH 01/16] [BOLT][NFC] Add sink block to flow CFG in profile
 inference

Test Plan: tbd

Reviewers:
Subscribers:

Tasks:

Tags:

Differential Revision: https://phabricator.intern.facebook.com/D58380996
---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 38 ++++++++++++++++---
 .../Transforms/Utils/SampleProfileInference.h |  3 +-
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 365bc5389266d..8ecfb618072ab 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -309,22 +309,33 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   FlowFunction Func;
 
   // Add a special "dummy" source so that there is always a unique entry point.
-  // Because of the extra source, for all other blocks in FlowFunction it holds
-  // that Block.Index == BB->getIndex() + 1
   FlowBlock EntryBlock;
   EntryBlock.Index = 0;
   Func.Blocks.push_back(EntryBlock);
 
+  auto BinaryBlockIsExit = [&](const BinaryBasicBlock &BB) {
+    if (BB.successors().empty())
+      return true;
+    return false;
+  };
+
   // Create FlowBlock for every basic block in the binary function
   for (const BinaryBasicBlock *BB : BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
     Block.Index = Func.Blocks.size() - 1;
+    Block.HasSuccessors = BinaryBlockIsExit(*BB);
     (void)BB;
     assert(Block.Index == BB->getIndex() + 1 &&
            "incorrectly assigned basic block index");
   }
 
+  // Add a special "dummy" sink block so there is always a unique sink
+  FlowBlock SinkBlock;
+  SinkBlock.Index = Func.Blocks.size();
+  Func.Blocks.push_back(SinkBlock);
+  Func.Sink = SinkBlock.Index;
+
   // Create FlowJump for each jump between basic blocks in the binary function
   std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
   for (const BinaryBasicBlock *SrcBB : BlockOrder) {
@@ -360,18 +371,29 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   // Add dummy edges to the extra sources. If there are multiple entry blocks,
   // add an unlikely edge from 0 to the subsequent ones
   assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
-  for (uint64_t I = 1; I < Func.Blocks.size(); I++) {
+  for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
     const BinaryBasicBlock *BB = BlockOrder[I - 1];
     if (BB->isEntryPoint() || InDegree[I] == 0) {
       Func.Jumps.emplace_back();
       FlowJump &Jump = Func.Jumps.back();
-      Jump.Source = 0;
+      Jump.Source = Func.Entry;
       Jump.Target = I;
       if (!BB->isEntryPoint())
         Jump.IsUnlikely = true;
     }
   }
 
+  // Add dummy edges from the exit blocks to the sink block.
+  for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
+    FlowBlock &Block = Func.Blocks[I];
+    if (Block.HasSuccessors) {
+      Func.Jumps.emplace_back();
+      FlowJump &Jump = Func.Jumps.back();
+      Jump.Source = I;
+      Jump.Target = Func.Sink;
+    }
+  }
+
   // Create necessary metadata for the flow function
   for (FlowJump &Jump : Func.Jumps) {
     assert(Jump.Source < Func.Blocks.size());
@@ -379,6 +401,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
     assert(Jump.Target < Func.Blocks.size());
     Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
   }
+
   return Func;
 }
 
@@ -395,7 +418,7 @@ void matchWeightsByHashes(BinaryContext &BC,
                           const BinaryFunction::BasicBlockOrderType &BlockOrder,
                           const yaml::bolt::BinaryFunctionProfile &YamlBF,
                           FlowFunction &Func) {
-  assert(Func.Blocks.size() == BlockOrder.size() + 1);
+  assert(Func.Blocks.size() == BlockOrder.size() + 2);
 
   std::vector<FlowBlock *> Blocks;
   std::vector<BlendedBlockHash> BlendedHashes;
@@ -618,7 +641,7 @@ void assignProfile(BinaryFunction &BF,
                    FlowFunction &Func) {
   BinaryContext &BC = BF.getBinaryContext();
 
-  assert(Func.Blocks.size() == BlockOrder.size() + 1);
+  assert(Func.Blocks.size() == BlockOrder.size() + 2);
   for (uint64_t I = 0; I < BlockOrder.size(); I++) {
     FlowBlock &Block = Func.Blocks[I + 1];
     BinaryBasicBlock *BB = BlockOrder[I];
@@ -640,6 +663,9 @@ void assignProfile(BinaryFunction &BF,
       if (Jump->Flow == 0)
         continue;
 
+      // Skip the artificial sink block
+      if (Jump->Target == Func.Sink)
+        continue;
       BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];
       // Check if the edge corresponds to a regular jump or a landing pad
       if (BB->getSuccessor(SuccBB.getLabel())) {
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index b4ea1ad840f9d..b2af05a24c705 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -31,10 +31,10 @@ struct FlowBlock {
   uint64_t Flow{0};
   std::vector<FlowJump *> SuccJumps;
   std::vector<FlowJump *> PredJumps;
+  bool HasSuccessors{false};
 
   /// Check if it is the entry block in the function.
   bool isEntry() const { return PredJumps.empty(); }
-
   /// Check if it is an exit block in the function.
   bool isExit() const { return SuccJumps.empty(); }
 };
@@ -57,6 +57,7 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
+  uint64_t Sink{0};
 };
 
 /// Various thresholds and options controlling the behavior of the profile

>From 34922ba752ebe755e89fdf3cca0ef58073e7a9b0 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 10 Jun 2024 16:21:36 -0700
Subject: [PATCH 02/16] Changing design decisions

---
 bolt/lib/Profile/StaleProfileMatching.cpp      | 18 ++++++------------
 .../Transforms/Utils/SampleProfileInference.h  |  5 +++--
 2 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 8ecfb618072ab..58f9f459af84a 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -313,18 +313,13 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   EntryBlock.Index = 0;
   Func.Blocks.push_back(EntryBlock);
 
-  auto BinaryBlockIsExit = [&](const BinaryBasicBlock &BB) {
-    if (BB.successors().empty())
-      return true;
-    return false;
-  };
-
   // Create FlowBlock for every basic block in the binary function
   for (const BinaryBasicBlock *BB : BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
     Block.Index = Func.Blocks.size() - 1;
-    Block.HasSuccessors = BinaryBlockIsExit(*BB);
+    if (BB->successors().empty())
+      Block.IsExit = true;
     (void)BB;
     assert(Block.Index == BB->getIndex() + 1 &&
            "incorrectly assigned basic block index");
@@ -369,14 +364,14 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   }
 
   // Add dummy edges to the extra sources. If there are multiple entry blocks,
-  // add an unlikely edge from 0 to the subsequent ones
+  // add an unlikely edge from 0 to the subsequent ones. Skips the sink block.
   assert(InDegree[0] == 0 && "dummy entry blocks shouldn't have predecessors");
-  for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
+  for (uint64_t I = 1; I < Func.Blocks.size() - 1; I++) {
     const BinaryBasicBlock *BB = BlockOrder[I - 1];
     if (BB->isEntryPoint() || InDegree[I] == 0) {
       Func.Jumps.emplace_back();
       FlowJump &Jump = Func.Jumps.back();
-      Jump.Source = Func.Entry;
+      Jump.Source = 0;
       Jump.Target = I;
       if (!BB->isEntryPoint())
         Jump.IsUnlikely = true;
@@ -386,7 +381,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   // Add dummy edges from the exit blocks to the sink block.
   for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
     FlowBlock &Block = Func.Blocks[I];
-    if (Block.HasSuccessors) {
+    if (Block.IsExit) {
       Func.Jumps.emplace_back();
       FlowJump &Jump = Func.Jumps.back();
       Jump.Source = I;
@@ -401,7 +396,6 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
     assert(Jump.Target < Func.Blocks.size());
     Func.Blocks[Jump.Target].PredJumps.push_back(&Jump);
   }
-
   return Func;
 }
 
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index b2af05a24c705..1f74dc0f7108b 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -28,13 +28,14 @@ struct FlowBlock {
   uint64_t Weight{0};
   bool HasUnknownWeight{true};
   bool IsUnlikely{false};
+  bool IsExit{false};
   uint64_t Flow{0};
   std::vector<FlowJump *> SuccJumps;
   std::vector<FlowJump *> PredJumps;
-  bool HasSuccessors{false};
-
+  
   /// Check if it is the entry block in the function.
   bool isEntry() const { return PredJumps.empty(); }
+
   /// Check if it is an exit block in the function.
   bool isExit() const { return SuccJumps.empty(); }
 };

>From 1712ecb7c9bb9e1c381221a4bd34f44b17590ef2 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 10 Jun 2024 16:31:11 -0700
Subject: [PATCH 03/16] Removing whitespace

---
 llvm/include/llvm/Transforms/Utils/SampleProfileInference.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 1f74dc0f7108b..d8962a76a8016 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -32,7 +32,7 @@ struct FlowBlock {
   uint64_t Flow{0};
   std::vector<FlowJump *> SuccJumps;
   std::vector<FlowJump *> PredJumps;
-  
+
   /// Check if it is the entry block in the function.
   bool isEntry() const { return PredJumps.empty(); }
 

>From 78493cbff5ae141cc6b388ca9dcb306597566bbd Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Mon, 10 Jun 2024 17:00:19 -0700
Subject: [PATCH 04/16] Update bolt/lib/Profile/StaleProfileMatching.cpp

Co-authored-by: Amir Ayupov <fads93 at gmail.com>
---
 bolt/lib/Profile/StaleProfileMatching.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 58f9f459af84a..5e3e9e8e725b1 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -318,8 +318,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
     Block.Index = Func.Blocks.size() - 1;
-    if (BB->successors().empty())
-      Block.IsExit = true;
+    Block.IsExit = BB->successors().empty();
     (void)BB;
     assert(Block.Index == BB->getIndex() + 1 &&
            "incorrectly assigned basic block index");

>From 4f647bf3a12b8fb876d6784f00830e84aa3e7d95 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 00:49:48 -0700
Subject: [PATCH 05/16] Suggested style changes

---
 bolt/lib/Profile/StaleProfileMatching.cpp     | 20 +++++++++----------
 .../Transforms/Utils/SampleProfileInference.h |  2 +-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 58f9f459af84a..5969f4b9bcbc1 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -313,7 +313,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   EntryBlock.Index = 0;
   Func.Blocks.push_back(EntryBlock);
 
-  // Create FlowBlock for every basic block in the binary function
+  // Create FlowBlock for every basic block in the binary function.
   for (const BinaryBasicBlock *BB : BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
@@ -325,13 +325,13 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
            "incorrectly assigned basic block index");
   }
 
-  // Add a special "dummy" sink block so there is always a unique sink
+  // Add a special "dummy" sink block so there is always a unique sink.
   FlowBlock SinkBlock;
   SinkBlock.Index = Func.Blocks.size();
   Func.Blocks.push_back(SinkBlock);
   Func.Sink = SinkBlock.Index;
 
-  // Create FlowJump for each jump between basic blocks in the binary function
+  // Create FlowJump for each jump between basic blocks in the binary function.
   std::vector<uint64_t> InDegree(Func.Blocks.size(), 0);
   for (const BinaryBasicBlock *SrcBB : BlockOrder) {
     std::unordered_set<const BinaryBasicBlock *> UniqueSuccs;
@@ -380,13 +380,13 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
 
   // Add dummy edges from the exit blocks to the sink block.
   for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
-    FlowBlock &Block = Func.Blocks[I];
-    if (Block.IsExit) {
-      Func.Jumps.emplace_back();
-      FlowJump &Jump = Func.Jumps.back();
-      Jump.Source = I;
-      Jump.Target = Func.Sink;
-    }
+    if (!Func.Blocks[I].IsExit)
+      continue;
+
+    Func.Jumps.emplace_back();
+    FlowJump &Jump = Func.Jumps.back();
+    Jump.Source = I;
+    Jump.Target = Func.Sink;
   }
 
   // Create necessary metadata for the flow function
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index d8962a76a8016..70a80b91405d0 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -58,7 +58,7 @@ struct FlowFunction {
   std::vector<FlowJump> Jumps;
   /// The index of the entry block.
   uint64_t Entry{0};
-  uint64_t Sink{0};
+  uint64_t Sink{UINT64_MAX};
 };
 
 /// Various thresholds and options controlling the behavior of the profile

>From 4fc35430a0be5afb4ff7909ef6f51859b7019e33 Mon Sep 17 00:00:00 2001
From: shaw young <58664393+shawbyoung at users.noreply.github.com>
Date: Tue, 11 Jun 2024 09:16:20 -0700
Subject: [PATCH 06/16] Commenting

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index cf75b3da0e58d..a35513abbefc4 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -656,7 +656,7 @@ void assignProfile(BinaryFunction &BF,
       if (Jump->Flow == 0)
         continue;
 
-      // Skip the artificial sink block
+      // Skips the artificial sink block.
       if (Jump->Target == Func.Sink)
         continue;
       BinaryBasicBlock &SuccBB = *BlockOrder[Jump->Target - 1];

>From e1c417b092edc87e8236bf1c7f290207700a31bd Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 16:29:41 -0700
Subject: [PATCH 07/16] Bailing out in the case of no exit blocks

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index a35513abbefc4..ee6d1049363b4 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -595,9 +595,7 @@ bool canApplyInference(const FlowFunction &Func) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
-  bool HasExitBlocks = llvm::any_of(
-      Func.Blocks, [&](const FlowBlock &Block) { return Block.isExit(); });
-  if (!HasExitBlocks)
+  if (Func.Blocks[Func.Sink].isEntry())
     return false;
 
   return true;

>From 39e2344aef6c537cc85e2aa4c99a251e4a36d8c6 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 20:41:04 -0700
Subject: [PATCH 08/16] Commenting

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index ee6d1049363b4..7a8dcb5fa3d64 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -595,6 +595,8 @@ bool canApplyInference(const FlowFunction &Func) {
   if (Func.Blocks.size() > opts::StaleMatchingMaxFuncSize)
     return false;
 
+  // Returns false if the artificial sink block has no predecessors meaning
+  // there are no exit blocks.  
   if (Func.Blocks[Func.Sink].isEntry())
     return false;
 

>From ee2eb50d64e7f425cfb3e287fdc4797caece22fd Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 11 Jun 2024 20:50:42 -0700
Subject: [PATCH 09/16] Formatting

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 7a8dcb5fa3d64..6b1c8a473ea78 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -596,7 +596,7 @@ bool canApplyInference(const FlowFunction &Func) {
     return false;
 
   // Returns false if the artificial sink block has no predecessors meaning
-  // there are no exit blocks.  
+  // there are no exit blocks.
   if (Func.Blocks[Func.Sink].isEntry())
     return false;
 

>From c4e9879f71693a79cc2fa6e27cf86aadbc9d42d5 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Wed, 12 Jun 2024 09:40:26 -0700
Subject: [PATCH 10/16] Testing plan

---
 bolt/test/X86/Inputs/infer_no_exits.cpp | 17 +++++++++++++++++
 bolt/test/X86/infer_no_exits.test       | 11 +++++++++++
 2 files changed, 28 insertions(+)
 create mode 100644 bolt/test/X86/Inputs/infer_no_exits.cpp
 create mode 100644 bolt/test/X86/infer_no_exits.test

diff --git a/bolt/test/X86/Inputs/infer_no_exits.cpp b/bolt/test/X86/Inputs/infer_no_exits.cpp
new file mode 100644
index 0000000000000..0c743d839ca32
--- /dev/null
+++ b/bolt/test/X86/Inputs/infer_no_exits.cpp
@@ -0,0 +1,17 @@
+#include <exception>
+#include <stdexcept>
+void foo(int a) {
+  if (!a)
+    throw std::out_of_range("bad value");
+  return;
+}
+
+int main() {
+  try {
+    foo(1);
+    foo(0);
+  } catch (...) {
+  }
+  std::terminate();
+  return 0;
+}
diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
new file mode 100644
index 0000000000000..105592047911e
--- /dev/null
+++ b/bolt/test/X86/infer_no_exits.test
@@ -0,0 +1,11 @@
+## This verifies that functions where an exit block has a landing pad are covered by stale profile inference.
+# RUN: %clangxx %cxxflags %p/Inputs/infer_no_exits.cpp -o %t.exe
+# RUN: link_fdata %s %t.exe %t.preagg PREAGG
+# RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
+# RUN: sed -i 's/0000/0001/g' %t.yaml
+# RUN: llvm-bolt %t.exe -data %t.yaml -o %t.null -v=1 -infer-stale-profile 2>&1 \
+# RUN:   | FileCheck %s
+
+# PREAGG: B X:0 #main# 1 0
+
+# CHECK: BOLT-INFO

>From f0d1c6eef276712e14908215c03302c6e71251a5 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Wed, 12 Jun 2024 17:00:09 -0700
Subject: [PATCH 11/16] Tests a exit block with a landing pad

---
 bolt/test/X86/Inputs/infer_no_exits.s | 179 ++++++++++++++++++++++++++
 bolt/test/X86/infer_no_exits.test     |   4 +-
 2 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 bolt/test/X86/Inputs/infer_no_exits.s

diff --git a/bolt/test/X86/Inputs/infer_no_exits.s b/bolt/test/X86/Inputs/infer_no_exits.s
new file mode 100644
index 0000000000000..c3ca349e26651
--- /dev/null
+++ b/bolt/test/X86/Inputs/infer_no_exits.s
@@ -0,0 +1,179 @@
+	.text
+	.file	"infer_no_exits.cpp"
+	.globl	_Z3fooi                         # -- Begin function _Z3fooi
+	.p2align	4, 0x90
+	.type	_Z3fooi, at function
+_Z3fooi:                                # @_Z3fooi
+.Lfunc_begin0:
+	.cfi_startproc
+	.cfi_personality 155, DW.ref.__gxx_personality_v0
+	.cfi_lsda 27, .Lexception0
+# %bb.0:                                # %entry
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	subq	$32, %rsp
+	movl	%edi, -4(%rbp)
+	cmpl	$0, -4(%rbp)
+	jne	.LBB0_4
+# %bb.1:                                # %if.then
+	movl	$16, %edi
+	callq	__cxa_allocate_exception at PLT
+	movq	%rax, %rdi
+	movq	%rdi, %rax
+	movq	%rax, -32(%rbp)                 # 8-byte Spill
+.Ltmp0:
+	leaq	.L.str(%rip), %rsi
+	callq	_ZNSt12out_of_rangeC1EPKc at PLT
+.Ltmp1:
+	jmp	.LBB0_2
+.LBB0_2:                                # %invoke.cont
+	movq	-32(%rbp), %rdi                 # 8-byte Reload
+	movq	_ZTISt12out_of_range at GOTPCREL(%rip), %rsi
+	movq	_ZNSt12out_of_rangeD1Ev at GOTPCREL(%rip), %rdx
+	callq	__cxa_throw at PLT
+.LBB0_3:                                # %lpad
+.Ltmp2:
+	movq	-32(%rbp), %rdi                 # 8-byte Reload
+	movq	%rax, %rcx
+	movl	%edx, %eax
+	movq	%rcx, -16(%rbp)
+	movl	%eax, -20(%rbp)
+	callq	__cxa_free_exception at PLT
+	jmp	.LBB0_5
+.LBB0_4:                                # %if.end
+	xorl	%eax, %eax
+	addq	$32, %rsp
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.LBB0_5:                                # %eh.resume
+	.cfi_def_cfa %rbp, 16
+	movq	-16(%rbp), %rdi
+	callq	_Unwind_Resume at PLT
+.Lfunc_end0:
+	.size	_Z3fooi, .Lfunc_end0-_Z3fooi
+	.cfi_endproc
+	.section	.gcc_except_table,"a", at progbits
+	.p2align	2, 0x0
+GCC_except_table0:
+.Lexception0:
+	.byte	255                             # @LPStart Encoding = omit
+	.byte	255                             # @TType Encoding = omit
+	.byte	1                               # Call site Encoding = uleb128
+	.uleb128 .Lcst_end0-.Lcst_begin0
+.Lcst_begin0:
+	.uleb128 .Lfunc_begin0-.Lfunc_begin0    # >> Call Site 1 <<
+	.uleb128 .Ltmp0-.Lfunc_begin0           #   Call between .Lfunc_begin0 and .Ltmp0
+	.byte	0                               #     has no landing pad
+	.byte	0                               #   On action: cleanup
+	.uleb128 .Ltmp0-.Lfunc_begin0           # >> Call Site 2 <<
+	.uleb128 .Ltmp1-.Ltmp0                  #   Call between .Ltmp0 and .Ltmp1
+	.uleb128 .Ltmp2-.Lfunc_begin0           #     jumps to .Ltmp2
+	.byte	0                               #   On action: cleanup
+	.uleb128 .Ltmp1-.Lfunc_begin0           # >> Call Site 3 <<
+	.uleb128 .Lfunc_end0-.Ltmp1             #   Call between .Ltmp1 and .Lfunc_end0
+	.byte	0                               #     has no landing pad
+	.byte	0                               #   On action: cleanup
+.Lcst_end0:
+	.p2align	2, 0x0
+                                        # -- End function
+	.text
+	.globl	main                            # -- Begin function main
+	.p2align	4, 0x90
+	.type	main, at function
+main:                                   # @main
+.Lfunc_begin1:
+	.cfi_startproc
+	.cfi_personality 155, DW.ref.__gxx_personality_v0
+	.cfi_lsda 27, .Lexception1
+# %bb.0:                                # %entry
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset %rbp, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register %rbp
+	subq	$32, %rsp
+	movl	$0, -4(%rbp)
+	jmp	.Ltmp3
+.LBB1_2:                                # %lpad
+	movq	%rax, %rcx
+	movl	%edx, %eax
+	movq	%rcx, -16(%rbp)
+	movl	%eax, -20(%rbp)
+.Lcatch:
+# %bb.3:                                # %catch
+	movq	-16(%rbp), %rdi
+	callq	__cxa_begin_catch at PLT
+	callq	_ZSt9terminatev at PLT
+.Ltmp3:
+	xorl	%edi, %edi
+	callq	_Z3fooi
+	xorl	%eax, %eax
+	addq	$32, %rsp
+	popq	%rbp
+	.cfi_def_cfa %rsp, 8
+	retq
+.Lgarbage:
+
+.Lfunc_end1:
+	.size	main, .Lfunc_end1-main
+	.cfi_endproc
+	.section	.gcc_except_table,"a", at progbits
+	.p2align	2, 0x0
+GCC_except_table1:
+.Lexception1:
+	.byte	255                             # @LPStart Encoding = omit
+	.byte	155                             # @TType Encoding = indirect pcrel sdata4
+	.uleb128 .Lttbase0-.Lttbaseref0
+.Lttbaseref0:
+	.byte	1                               # Call site Encoding = uleb128
+	.uleb128 .Lcst_end1-.Lcst_begin1
+.Lcst_begin1:
+	.uleb128 .Ltmp3-.Lfunc_begin1           # >> Call Site 1 <<
+	.uleb128 .Lgarbage-.Ltmp3                  #   Call between .Ltmp3 and .Ltmp4
+	.uleb128 .LBB1_2-.Lfunc_begin1           #     jumps to .LBB1_2
+	.byte	1                               #   On action: 1
+	.uleb128 .Lcatch-.Lfunc_begin1           # >> Call Site 2 <<
+	.uleb128 .Lfunc_end1-.Ltmp3             #   Call between .Ltmp4 and .Lfunc_end1
+#	.uleb128 .LBB1_2-.Lfunc_begin1           #     jumps to .LBB1_2
+	.byte	0                               #   On action: cleanup
+	.byte	0                               #   On action: cleanup
+.Lcst_end1:
+	.byte	1                               # >> Action Record 1 <<
+                                        #   Catch TypeInfo 1
+	.byte	0                               #   No further actions
+	.p2align	2, 0x0
+                                        # >> Catch TypeInfos <<
+	.long	0                               # TypeInfo 1
+.Lttbase0:
+	.p2align	2, 0x0
+                                        # -- End function
+	.type	.L.str, at object                  # @.str
+	.section	.rodata.str1.1,"aMS", at progbits,1
+.L.str:
+	.asciz	"bad value"
+	.size	.L.str, 10
+
+	.hidden	DW.ref.__gxx_personality_v0
+	.weak	DW.ref.__gxx_personality_v0
+	.section	.data.DW.ref.__gxx_personality_v0,"awG", at progbits,DW.ref.__gxx_personality_v0,comdat
+	.p2align	3, 0x0
+	.type	DW.ref.__gxx_personality_v0, at object
+	.size	DW.ref.__gxx_personality_v0, 8
+DW.ref.__gxx_personality_v0:
+	.quad	__gxx_personality_v0
+	.ident	"Facebook clang version 19.0.0 (https://git.internal.tfbnw.net/repos/git/rw/osmeta/external/llvm-project 4fc35430a0be5afb4ff7909ef6f51859b7019e33)"
+	.section	".note.GNU-stack","", at progbits
+	.addrsig
+	.addrsig_sym _Z3fooi
+	.addrsig_sym __cxa_allocate_exception
+	.addrsig_sym __gxx_personality_v0
+	.addrsig_sym __cxa_free_exception
+	.addrsig_sym __cxa_throw
+	.addrsig_sym __cxa_begin_catch
+	.addrsig_sym _ZSt9terminatev
+	.addrsig_sym _Unwind_Resume
+	.addrsig_sym _ZTISt12out_of_range
diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
index 105592047911e..2e8027aaf698f 100644
--- a/bolt/test/X86/infer_no_exits.test
+++ b/bolt/test/X86/infer_no_exits.test
@@ -1,8 +1,10 @@
 ## This verifies that functions where an exit block has a landing pad are covered by stale profile inference.
-# RUN: %clangxx %cxxflags %p/Inputs/infer_no_exits.cpp -o %t.exe
+# RUN: %clangxx %cxxflags %p/Inputs/infer_no_exits.s -o %t.exe
 # RUN: link_fdata %s %t.exe %t.preagg PREAGG
 # RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
 # RUN: sed -i 's/0000/0001/g' %t.yaml
+# RUN: sed -i 's/hash:            0x9AE3FDBFFBE36274/hash:            0x0000000000000000/g' %t.yaml
+
 # RUN: llvm-bolt %t.exe -data %t.yaml -o %t.null -v=1 -infer-stale-profile 2>&1 \
 # RUN:   | FileCheck %s
 

>From 54309ed47a219918a7cc836989a5f9f8e2765682 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 13 Jun 2024 08:19:29 -0700
Subject: [PATCH 12/16] Updating test

---
 bolt/test/X86/Inputs/infer_no_exits.s | 4 ----
 bolt/test/X86/infer_no_exits.test     | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/bolt/test/X86/Inputs/infer_no_exits.s b/bolt/test/X86/Inputs/infer_no_exits.s
index c3ca349e26651..0a65b6568f61f 100644
--- a/bolt/test/X86/Inputs/infer_no_exits.s
+++ b/bolt/test/X86/Inputs/infer_no_exits.s
@@ -162,10 +162,6 @@ GCC_except_table1:
 	.section	.data.DW.ref.__gxx_personality_v0,"awG", at progbits,DW.ref.__gxx_personality_v0,comdat
 	.p2align	3, 0x0
 	.type	DW.ref.__gxx_personality_v0, at object
-	.size	DW.ref.__gxx_personality_v0, 8
-DW.ref.__gxx_personality_v0:
-	.quad	__gxx_personality_v0
-	.ident	"Facebook clang version 19.0.0 (https://git.internal.tfbnw.net/repos/git/rw/osmeta/external/llvm-project 4fc35430a0be5afb4ff7909ef6f51859b7019e33)"
 	.section	".note.GNU-stack","", at progbits
 	.addrsig
 	.addrsig_sym _Z3fooi
diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
index 2e8027aaf698f..7009cdcb3dfe9 100644
--- a/bolt/test/X86/infer_no_exits.test
+++ b/bolt/test/X86/infer_no_exits.test
@@ -3,7 +3,7 @@
 # RUN: link_fdata %s %t.exe %t.preagg PREAGG
 # RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
 # RUN: sed -i 's/0000/0001/g' %t.yaml
-# RUN: sed -i 's/hash:            0x9AE3FDBFFBE36274/hash:            0x0000000000000000/g' %t.yaml
+# RUN: sed -i 's/0x[0-9A-Fa-f]{16}/0x0000000000000000/g' %t.yaml
 
 # RUN: llvm-bolt %t.exe -data %t.yaml -o %t.null -v=1 -infer-stale-profile 2>&1 \
 # RUN:   | FileCheck %s

>From 07ee171307deeabe52912ecb103387184c9c9089 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 13 Jun 2024 08:59:17 -0700
Subject: [PATCH 13/16] Testing, changing hash function in YAML

---
 bolt/test/X86/infer_no_exits.test | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
index 7009cdcb3dfe9..22fd38fa154c9 100644
--- a/bolt/test/X86/infer_no_exits.test
+++ b/bolt/test/X86/infer_no_exits.test
@@ -3,8 +3,7 @@
 # RUN: link_fdata %s %t.exe %t.preagg PREAGG
 # RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
 # RUN: sed -i 's/0000/0001/g' %t.yaml
-# RUN: sed -i 's/0x[0-9A-Fa-f]{16}/0x0000000000000000/g' %t.yaml
-
+# RUN: sed -i '0,/hash:/s/0x[0-9A-Fa-f]\{16\}/0x0000000000000000/' %t.yaml
 # RUN: llvm-bolt %t.exe -data %t.yaml -o %t.null -v=1 -infer-stale-profile 2>&1 \
 # RUN:   | FileCheck %s
 

>From 525a224dfb79de39980210fc45a62df6102f2b50 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 13 Jun 2024 09:05:54 -0700
Subject: [PATCH 14/16] Removed extraneous test command

---
 bolt/test/X86/infer_no_exits.test | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/test/X86/infer_no_exits.test b/bolt/test/X86/infer_no_exits.test
index 22fd38fa154c9..043e2ebef0d13 100644
--- a/bolt/test/X86/infer_no_exits.test
+++ b/bolt/test/X86/infer_no_exits.test
@@ -1,5 +1,4 @@
 ## This verifies that functions where an exit block has a landing pad are covered by stale profile inference.
-# RUN: %clangxx %cxxflags %p/Inputs/infer_no_exits.s -o %t.exe
 # RUN: link_fdata %s %t.exe %t.preagg PREAGG
 # RUN: perf2bolt %t.exe -p %t.preagg --pa -o %t.fdata -w %t.yaml
 # RUN: sed -i 's/0000/0001/g' %t.yaml

>From a3b0508451a7fbd3da34250bc1b692632ebba66c Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 13 Jun 2024 09:16:50 -0700
Subject: [PATCH 15/16] Removed IsExit field from FlowBlock

---
 bolt/lib/Profile/StaleProfileMatching.cpp             | 11 +++++------
 .../llvm/Transforms/Utils/SampleProfileInference.h    |  1 -
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 6b1c8a473ea78..22c9910190f21 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -307,7 +307,7 @@ void BinaryFunction::computeBlockHashes(HashFunction HashFunction) const {
 FlowFunction
 createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   FlowFunction Func;
-
+  std::vector<uint64_t> ExitBlocksIndices;
   // Add a special "dummy" source so that there is always a unique entry point.
   FlowBlock EntryBlock;
   EntryBlock.Index = 0;
@@ -318,7 +318,9 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
     Block.Index = Func.Blocks.size() - 1;
-    Block.IsExit = BB->successors().empty();
+    if ( BB->successors().empty() ) {
+      ExitBlocksIndices.push_back(Block.Index);  
+    }
     (void)BB;
     assert(Block.Index == BB->getIndex() + 1 &&
            "incorrectly assigned basic block index");
@@ -378,10 +380,7 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
   }
 
   // Add dummy edges from the exit blocks to the sink block.
-  for (uint64_t I = 1; I < BlockOrder.size() + 1; I++) {
-    if (!Func.Blocks[I].IsExit)
-      continue;
-
+  for (uint64_t I : ExitBlocksIndices) {
     Func.Jumps.emplace_back();
     FlowJump &Jump = Func.Jumps.back();
     Jump.Source = I;
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
index 70a80b91405d0..0716977bafc9c 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileInference.h
@@ -28,7 +28,6 @@ struct FlowBlock {
   uint64_t Weight{0};
   bool HasUnknownWeight{true};
   bool IsUnlikely{false};
-  bool IsExit{false};
   uint64_t Flow{0};
   std::vector<FlowJump *> SuccJumps;
   std::vector<FlowJump *> PredJumps;

>From a35348357828a0d096ffbce8b0f584f8b0af4899 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 13 Jun 2024 09:18:47 -0700
Subject: [PATCH 16/16] Formatting

---
 bolt/lib/Profile/StaleProfileMatching.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/StaleProfileMatching.cpp b/bolt/lib/Profile/StaleProfileMatching.cpp
index 22c9910190f21..d49e2d8410586 100644
--- a/bolt/lib/Profile/StaleProfileMatching.cpp
+++ b/bolt/lib/Profile/StaleProfileMatching.cpp
@@ -318,8 +318,8 @@ createFlowFunction(const BinaryFunction::BasicBlockOrderType &BlockOrder) {
     Func.Blocks.emplace_back();
     FlowBlock &Block = Func.Blocks.back();
     Block.Index = Func.Blocks.size() - 1;
-    if ( BB->successors().empty() ) {
-      ExitBlocksIndices.push_back(Block.Index);  
+    if (BB->successors().empty()) {
+      ExitBlocksIndices.push_back(Block.Index);
     }
     (void)BB;
     assert(Block.Index == BB->getIndex() + 1 &&



More information about the llvm-commits mailing list