[llvm] [BOLT] Add structure of CDSplit to SplitFunctions (PR #73430)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 25 22:51:10 PST 2023
https://github.com/ShatianWang created https://github.com/llvm/llvm-project/pull/73430
This commit establishes the general structure of the CDSplit strategy in SplitFunctions without incorporating the exact splitting logic. With -split-functions -split-strategy=cdsplit, the SplitFunctions pass will run twice: the first time is before function reordering and functions are hot-cold split; the second time is after function reordering and functions are hot-warm-cold split based on the fixed function ordering. Currently, all functions are hot-warm split after the entry block in the second splitting pass. Subsequent commits will introduce the precise splitting logic.
>From 4f1d6e9d017180535f24087e488db9eb2f1cdbf2 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Sat, 25 Nov 2023 19:56:31 -0800
Subject: [PATCH] [BOLT] Add structure of CDSplit to SplitFunctions
This commit establishes the general structure of the CDSplit
strategy in SplitFunctions without incorporating the exact
splitting logic. With -split-functions -split-strategy=cdsplit,
the SplitFunctions pass will run twice: the first time is before
function reordering and functions are hot-cold split; the second
time is after function reordering and functions are hot-warm-cold
split based on the fixed function ordering. Currently, all
functions are hot-warm split after the entry block in the second
splitting pass. Subsequent commits will introduce the precise
splitting logic.
---
bolt/include/bolt/Core/BinaryContext.h | 3 +
bolt/include/bolt/Passes/SplitFunctions.h | 6 ++
bolt/lib/Passes/ReorderFunctions.cpp | 2 +
bolt/lib/Passes/SplitFunctions.cpp | 87 ++++++++++++++++++++++-
bolt/lib/Rewrite/BinaryPassManager.cpp | 4 ++
5 files changed, 99 insertions(+), 3 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 312678f475347a4..fa5238d6463eb0a 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -611,6 +611,9 @@ class BinaryContext {
/// Indicates if the binary contains split functions.
bool HasSplitFunctions{false};
+ /// Indicates if the functions ordering in the binary is finalized.
+ bool HasFinalizedFunctionOrder{false};
+
/// Is the binary always loaded at a fixed address. Shared objects and
/// position-independent executables (PIEs) are examples of binaries that
/// will have HasFixedLoadAddress set to false.
diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
index 4058f3317dfbdbb..e4a7a827cd7563b 100644
--- a/bolt/include/bolt/Passes/SplitFunctions.h
+++ b/bolt/include/bolt/Passes/SplitFunctions.h
@@ -23,6 +23,9 @@ enum SplitFunctionsStrategy : char {
/// Split each function into a hot and cold fragment using profiling
/// information.
Profile2 = 0,
+ /// Split each function into a hot, warm, and cold fragment using
+ /// profiling information.
+ CDSplit,
/// Split each function into a hot and cold fragment at a randomly chosen
/// split point (ignoring any available profiling information).
Random2,
@@ -41,6 +44,9 @@ class SplitStrategy {
virtual ~SplitStrategy() = default;
virtual bool canSplit(const BinaryFunction &BF) = 0;
virtual bool keepEmpty() = 0;
+ // When autoReversal() == true, check if the new main fragment after splitting
+ // is of a smaller size; if not, revert splitting.
+ virtual bool autoReversal() = 0;
virtual void fragment(const BlockIt Start, const BlockIt End) = 0;
};
diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp
index 92c50f49d20cf7a..7e672969098130d 100644
--- a/bolt/lib/Passes/ReorderFunctions.cpp
+++ b/bolt/lib/Passes/ReorderFunctions.cpp
@@ -427,6 +427,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
reorder(std::move(Clusters), BFs);
+ BC.HasFinalizedFunctionOrder = true;
+
std::unique_ptr<std::ofstream> FuncsFile;
if (!opts::GenerateFunctionOrderFile.empty()) {
FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 34973cecdf49161..abb45b14d78500a 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -92,6 +92,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
"split each function into a hot and cold fragment "
"using profiling information")),
+ cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit",
+ "split each function into a hot, warm, and cold "
+ "fragment using profiling information")),
cl::values(clEnumValN(
SplitFunctionsStrategy::Random2, "random2",
"split each function into a hot and cold fragment at a randomly chosen "
@@ -126,7 +129,12 @@ struct SplitProfile2 final : public SplitStrategy {
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
}
- bool keepEmpty() override { return false; }
+ bool keepEmpty() override {
+ return opts::SplitStrategy != SplitFunctionsStrategy::CDSplit ? false
+ : true;
+ }
+
+ bool autoReversal() override { return true; }
void fragment(const BlockIt Start, const BlockIt End) override {
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
@@ -136,6 +144,55 @@ struct SplitProfile2 final : public SplitStrategy {
}
};
+struct SplitCacheDirected final : public SplitStrategy {
+ BinaryContext &BC;
+ using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
+
+ explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) {}
+
+ bool canSplit(const BinaryFunction &BF) override {
+ return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
+ }
+
+ bool keepEmpty() override { return true; }
+
+ // This strategy does not require that the new hot fragment size strictly
+ // decreases after splitting.
+ bool autoReversal() override { return false; }
+
+ void fragment(const BlockIt Start, const BlockIt End) override {
+ BasicBlockOrder BlockOrder(Start, End);
+ BinaryFunction &BF = *BlockOrder.front()->getFunction();
+
+ size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
+
+ // Assign fragments based on the computed best split index.
+ // All basic blocks with index up to the best split index become hot.
+ // All remaining blocks are warm / cold depending on if count is
+ // greater than 0 or not.
+ FragmentNum Main(0);
+ FragmentNum Warm(1);
+ FragmentNum Cold(2);
+ for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
+ BinaryBasicBlock *BB = BlockOrder[Index];
+ if (Index <= BestSplitIndex)
+ BB->setFragmentNum(Main);
+ else
+ BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
+ }
+ }
+
+private:
+ /// Find the best index for splitting. The returned value is the index of the
+ /// last hot basic block. Hence, "no splitting" is equivalent to returning the
+ /// value which is one less than the size of the function.
+ size_t findSplitIndex(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder) {
+ // Placeholder: hot-warm split after entry block.
+ return 0;
+ }
+};
+
struct SplitRandom2 final : public SplitStrategy {
std::minstd_rand0 Gen;
@@ -145,6 +202,8 @@ struct SplitRandom2 final : public SplitStrategy {
bool keepEmpty() override { return false; }
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -172,6 +231,8 @@ struct SplitRandomN final : public SplitStrategy {
bool keepEmpty() override { return false; }
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -223,6 +284,8 @@ struct SplitAll final : public SplitStrategy {
return true;
}
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
unsigned Fragment = 0;
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
@@ -246,10 +309,26 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return;
+ // Run a function splitting pass after the function reordering pass only if
+ // the split strategy is CDSplit.
+ if (BC.HasFinalizedFunctionOrder &&
+ opts::SplitStrategy != SplitFunctionsStrategy::CDSplit)
+ return;
+
std::unique_ptr<SplitStrategy> Strategy;
bool ForceSequential = false;
switch (opts::SplitStrategy) {
+ case SplitFunctionsStrategy::CDSplit:
+ // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2)
+ // before function reordering and hot-warm-cold splitting
+ // (SplitCacheDirected) after function reordering.
+ if (BC.HasFinalizedFunctionOrder)
+ Strategy = std::make_unique<SplitCacheDirected>(BC);
+ else
+ Strategy = std::make_unique<SplitProfile2>();
+ opts::AggressiveSplitting = true;
+ break;
case SplitFunctionsStrategy::Profile2:
Strategy = std::make_unique<SplitProfile2>();
break;
@@ -409,8 +488,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
<< " post-split is <0x" << Twine::utohexstr(HotSize)
<< ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
- if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
- alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
+ if (S.autoReversal() &&
+ alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
+ alignTo(HotSize, opts::SplitAlignThreshold) +
+ opts::SplitThreshold) {
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: Reversing splitting of function "
<< formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize,
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 37de3eabc6d235d..61e99ba3adbd4a0 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -430,6 +430,10 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
+ Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
+
+ Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
+
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(
More information about the llvm-commits
mailing list