[llvm] [BOLT] Add structure of CDSplit to SplitFunctions (PR #73430)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 27 17:57:49 PST 2023
https://github.com/ShatianWang updated https://github.com/llvm/llvm-project/pull/73430
>From e9e9c2d31b4fe842367af3050a2c6595d5caef74 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Sat, 25 Nov 2023 19:56:31 -0800
Subject: [PATCH 1/2] [BOLT] Add structure of CDSplit to SplitFunctions
This commit establishes the general structure of the CDSplit
strategy in SplitFunctions without incorporating the exact
splitting logic. With -split-functions -split-strategy=cdsplit,
the SplitFunctions pass will run twice: the first time is before
function reordering and functions are hot-cold split; the second
time is after function reordering and functions are hot-warm-cold
split based on the fixed function ordering. Currently, all
functions are hot-warm split after the entry block in the second
splitting pass. Subsequent commits will introduce the precise
splitting logic.
---
bolt/include/bolt/Core/BinaryContext.h | 3 +
bolt/include/bolt/Passes/SplitFunctions.h | 6 ++
bolt/lib/Passes/ReorderFunctions.cpp | 2 +
bolt/lib/Passes/SplitFunctions.cpp | 86 ++++++++++++++++++++++-
bolt/lib/Rewrite/BinaryPassManager.cpp | 7 ++
5 files changed, 101 insertions(+), 3 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index 312678f475347a4..48a8b2a5d83f2b0 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -611,6 +611,9 @@ class BinaryContext {
/// Indicates if the binary contains split functions.
bool HasSplitFunctions{false};
+ /// Indicates if the function ordering of the binary is finalized.
+ bool HasFinalizedFunctionOrder{false};
+
/// Is the binary always loaded at a fixed address. Shared objects and
/// position-independent executables (PIEs) are examples of binaries that
/// will have HasFixedLoadAddress set to false.
diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
index 4058f3317dfbdbb..e4a7a827cd7563b 100644
--- a/bolt/include/bolt/Passes/SplitFunctions.h
+++ b/bolt/include/bolt/Passes/SplitFunctions.h
@@ -23,6 +23,9 @@ enum SplitFunctionsStrategy : char {
/// Split each function into a hot and cold fragment using profiling
/// information.
Profile2 = 0,
+ /// Split each function into a hot, warm, and cold fragment using
+ /// profiling information.
+ CDSplit,
/// Split each function into a hot and cold fragment at a randomly chosen
/// split point (ignoring any available profiling information).
Random2,
@@ -41,6 +44,9 @@ class SplitStrategy {
virtual ~SplitStrategy() = default;
virtual bool canSplit(const BinaryFunction &BF) = 0;
virtual bool keepEmpty() = 0;
+ // When autoReversal() == true, check if the new main fragment after splitting
+ // is of a smaller size; if not, revert splitting.
+ virtual bool autoReversal() = 0;
virtual void fragment(const BlockIt Start, const BlockIt End) = 0;
};
diff --git a/bolt/lib/Passes/ReorderFunctions.cpp b/bolt/lib/Passes/ReorderFunctions.cpp
index 92c50f49d20cf7a..7e672969098130d 100644
--- a/bolt/lib/Passes/ReorderFunctions.cpp
+++ b/bolt/lib/Passes/ReorderFunctions.cpp
@@ -427,6 +427,8 @@ void ReorderFunctions::runOnFunctions(BinaryContext &BC) {
reorder(std::move(Clusters), BFs);
+ BC.HasFinalizedFunctionOrder = true;
+
std::unique_ptr<std::ofstream> FuncsFile;
if (!opts::GenerateFunctionOrderFile.empty()) {
FuncsFile = std::make_unique<std::ofstream>(opts::GenerateFunctionOrderFile,
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 34973cecdf49161..cada2beed490d6b 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -92,6 +92,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2",
"split each function into a hot and cold fragment "
"using profiling information")),
+ cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit",
+ "split each function into a hot, warm, and cold "
+ "fragment using profiling information")),
cl::values(clEnumValN(
SplitFunctionsStrategy::Random2, "random2",
"split each function into a hot and cold fragment at a randomly chosen "
@@ -106,6 +109,11 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
"fragment contains exactly a single basic block")),
cl::desc("strategy used to partition blocks into fragments"),
cl::cat(BoltOptCategory));
+
+bool threeWaySplit() {
+ return opts::SplitFunctions &&
+ opts::SplitStrategy == SplitFunctionsStrategy::CDSplit;
+}
} // namespace opts
namespace {
@@ -126,7 +134,12 @@ struct SplitProfile2 final : public SplitStrategy {
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
}
- bool keepEmpty() override { return false; }
+ bool keepEmpty() override {
+ return opts::SplitStrategy != SplitFunctionsStrategy::CDSplit ? false
+ : true;
+ }
+
+ bool autoReversal() override { return true; }
void fragment(const BlockIt Start, const BlockIt End) override {
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
@@ -136,6 +149,55 @@ struct SplitProfile2 final : public SplitStrategy {
}
};
+struct SplitCacheDirected final : public SplitStrategy {
+ BinaryContext &BC;
+ using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
+
+ explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) {}
+
+ bool canSplit(const BinaryFunction &BF) override {
+ return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
+ }
+
+ bool keepEmpty() override { return true; }
+
+ // This strategy does not require that the new hot fragment size strictly
+ // decreases after splitting.
+ bool autoReversal() override { return false; }
+
+ void fragment(const BlockIt Start, const BlockIt End) override {
+ BasicBlockOrder BlockOrder(Start, End);
+ BinaryFunction &BF = *BlockOrder.front()->getFunction();
+
+ size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
+
+ // Assign fragments based on the computed best split index.
+ // All basic blocks with index up to the best split index become hot.
+ // All remaining blocks are warm / cold depending on if count is
+ // greater than 0 or not.
+ FragmentNum Main(0);
+ FragmentNum Warm(1);
+ FragmentNum Cold(2);
+ for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
+ BinaryBasicBlock *BB = BlockOrder[Index];
+ if (Index <= BestSplitIndex)
+ BB->setFragmentNum(Main);
+ else
+ BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
+ }
+ }
+
+private:
+ /// Find the best index for splitting. The returned value is the index of the
+ /// last hot basic block. Hence, "no splitting" is equivalent to returning the
+ /// value which is one less than the size of the function.
+ size_t findSplitIndex(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder) {
+ // Placeholder: hot-warm split after entry block.
+ return 0;
+ }
+};
+
struct SplitRandom2 final : public SplitStrategy {
std::minstd_rand0 Gen;
@@ -145,6 +207,8 @@ struct SplitRandom2 final : public SplitStrategy {
bool keepEmpty() override { return false; }
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -172,6 +236,8 @@ struct SplitRandomN final : public SplitStrategy {
bool keepEmpty() override { return false; }
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -223,6 +289,8 @@ struct SplitAll final : public SplitStrategy {
return true;
}
+ bool autoReversal() override { return true; }
+
void fragment(const BlockIt Start, const BlockIt End) override {
unsigned Fragment = 0;
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
@@ -250,6 +318,16 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
bool ForceSequential = false;
switch (opts::SplitStrategy) {
+ case SplitFunctionsStrategy::CDSplit:
+ // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2)
+ // before function reordering and hot-warm-cold splitting
+ // (SplitCacheDirected) after function reordering.
+ if (BC.HasFinalizedFunctionOrder)
+ Strategy = std::make_unique<SplitCacheDirected>(BC);
+ else
+ Strategy = std::make_unique<SplitProfile2>();
+ opts::AggressiveSplitting = true;
+ break;
case SplitFunctionsStrategy::Profile2:
Strategy = std::make_unique<SplitProfile2>();
break;
@@ -409,8 +487,10 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
<< " post-split is <0x" << Twine::utohexstr(HotSize)
<< ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
- if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
- alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
+ if (S.autoReversal() &&
+ alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
+ alignTo(HotSize, opts::SplitAlignThreshold) +
+ opts::SplitThreshold) {
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: Reversing splitting of function "
<< formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize,
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 37de3eabc6d235d..da3b686f6a2cfe8 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -52,6 +52,7 @@ extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
+extern bool threeWaySplit();
static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
@@ -430,6 +431,12 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
+ if (opts::threeWaySplit()) {
+ Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
+ Manager.registerPass(
+ std::make_unique<FixupBranches>(PrintAfterBranchFixup));
+ }
+
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(
>From 165b75fbeb81af70629007838f76f784eec712d5 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Mon, 27 Nov 2023 17:41:29 -0800
Subject: [PATCH 2/2] fixup! [BOLT] Add structure of CDSplit to SplitFunctions
---
bolt/include/bolt/Passes/SplitFunctions.h | 3 --
bolt/lib/Passes/SplitFunctions.cpp | 55 +++++++++--------------
bolt/lib/Rewrite/BinaryPassManager.cpp | 7 +--
3 files changed, 23 insertions(+), 42 deletions(-)
diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
index e4a7a827cd7563b..6b5426a774b2558 100644
--- a/bolt/include/bolt/Passes/SplitFunctions.h
+++ b/bolt/include/bolt/Passes/SplitFunctions.h
@@ -44,9 +44,6 @@ class SplitStrategy {
virtual ~SplitStrategy() = default;
virtual bool canSplit(const BinaryFunction &BF) = 0;
virtual bool keepEmpty() = 0;
- // When autoReversal() == true, check if the new main fragment after splitting
- // is of a smaller size; if not, revert splitting.
- virtual bool autoReversal() = 0;
virtual void fragment(const BlockIt Start, const BlockIt End) = 0;
};
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index cada2beed490d6b..23b8d6cf9aaf282 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -109,11 +109,6 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
"fragment contains exactly a single basic block")),
cl::desc("strategy used to partition blocks into fragments"),
cl::cat(BoltOptCategory));
-
-bool threeWaySplit() {
- return opts::SplitFunctions &&
- opts::SplitStrategy == SplitFunctionsStrategy::CDSplit;
-}
} // namespace opts
namespace {
@@ -134,12 +129,7 @@ struct SplitProfile2 final : public SplitStrategy {
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
}
- bool keepEmpty() override {
- return opts::SplitStrategy != SplitFunctionsStrategy::CDSplit ? false
- : true;
- }
-
- bool autoReversal() override { return true; }
+ bool keepEmpty() override { return false; }
void fragment(const BlockIt Start, const BlockIt End) override {
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End)) {
@@ -150,21 +140,17 @@ struct SplitProfile2 final : public SplitStrategy {
};
struct SplitCacheDirected final : public SplitStrategy {
- BinaryContext &BC;
using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
- explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) {}
-
bool canSplit(const BinaryFunction &BF) override {
return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
}
+ // When some functions are hot-warm split and others are hot-warm-cold split,
+ // we do not want to change the fragment numbers of the blocks in the hot-warm
+ // split functions.
bool keepEmpty() override { return true; }
- // This strategy does not require that the new hot fragment size strictly
- // decreases after splitting.
- bool autoReversal() override { return false; }
-
void fragment(const BlockIt Start, const BlockIt End) override {
BasicBlockOrder BlockOrder(Start, End);
BinaryFunction &BF = *BlockOrder.front()->getFunction();
@@ -176,8 +162,8 @@ struct SplitCacheDirected final : public SplitStrategy {
// All remaining blocks are warm / cold depending on if count is
// greater than 0 or not.
FragmentNum Main(0);
- FragmentNum Warm(1);
- FragmentNum Cold(2);
+ FragmentNum Cold(1);
+ FragmentNum Warm(2);
for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
BinaryBasicBlock *BB = BlockOrder[Index];
if (Index <= BestSplitIndex)
@@ -207,8 +193,6 @@ struct SplitRandom2 final : public SplitStrategy {
bool keepEmpty() override { return false; }
- bool autoReversal() override { return true; }
-
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -236,8 +220,6 @@ struct SplitRandomN final : public SplitStrategy {
bool keepEmpty() override { return false; }
- bool autoReversal() override { return true; }
-
void fragment(const BlockIt Start, const BlockIt End) override {
using DiffT = typename std::iterator_traits<BlockIt>::difference_type;
const DiffT NumBlocks = End - Start;
@@ -289,8 +271,6 @@ struct SplitAll final : public SplitStrategy {
return true;
}
- bool autoReversal() override { return true; }
-
void fragment(const BlockIt Start, const BlockIt End) override {
unsigned Fragment = 0;
for (BinaryBasicBlock *const BB : llvm::make_range(Start, End))
@@ -314,6 +294,12 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
if (!opts::SplitFunctions)
return;
+ // If split strategy is not CDSplit, then a second run of the pass is not
+ // needed after function reordering.
+ if (BC.HasFinalizedFunctionOrder &&
+ opts::SplitStrategy != SplitFunctionsStrategy::CDSplit)
+ return;
+
std::unique_ptr<SplitStrategy> Strategy;
bool ForceSequential = false;
@@ -323,7 +309,7 @@ void SplitFunctions::runOnFunctions(BinaryContext &BC) {
// before function reordering and hot-warm-cold splitting
// (SplitCacheDirected) after function reordering.
if (BC.HasFinalizedFunctionOrder)
- Strategy = std::make_unique<SplitCacheDirected>(BC);
+ Strategy = std::make_unique<SplitCacheDirected>();
else
Strategy = std::make_unique<SplitProfile2>();
opts::AggressiveSplitting = true;
@@ -472,7 +458,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
}
- BF.getLayout().update(NewLayout);
+ const bool LayoutUpdated = BF.getLayout().update(NewLayout);
// For shared objects, invoke instructions and corresponding landing pads
// have to be placed in the same fragment. When we split them, create
@@ -482,15 +468,13 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
Trampolines = createEHTrampolines(BF);
// Check the new size to see if it's worth splitting the function.
- if (BC.isX86() && BF.isSplit()) {
+ if (BC.isX86() && LayoutUpdated) {
std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
LLVM_DEBUG(dbgs() << "Estimated size for function " << BF
<< " post-split is <0x" << Twine::utohexstr(HotSize)
<< ", 0x" << Twine::utohexstr(ColdSize) << ">\n");
- if (S.autoReversal() &&
- alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
- alignTo(HotSize, opts::SplitAlignThreshold) +
- opts::SplitThreshold) {
+ if (alignTo(OriginalHotSize, opts::SplitAlignThreshold) <=
+ alignTo(HotSize, opts::SplitAlignThreshold) + opts::SplitThreshold) {
if (opts::Verbosity >= 2) {
outs() << "BOLT-INFO: Reversing splitting of function "
<< formatv("{0}:\n {1:x}, {2:x} -> {3:x}\n", BF, HotSize,
@@ -511,6 +495,11 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
SplitBytesCold += ColdSize;
}
}
+
+ // Fix branches if the splitting decision of the pass after function
+ // reordering is different from that of the pass before function reordering.
+ if (LayoutUpdated && BC.HasFinalizedFunctionOrder)
+ BF.fixBranches();
}
SplitFunctions::TrampolineSetType
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index da3b686f6a2cfe8..eac6b502e4f24b6 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -52,7 +52,6 @@ extern cl::opt<bool> PrintDynoStats;
extern cl::opt<bool> DumpDotAll;
extern cl::opt<std::string> AsmDump;
extern cl::opt<bolt::PLTCall::OptType> PLT;
-extern bool threeWaySplit();
static cl::opt<bool>
DynoStatsAll("dyno-stats-all",
@@ -431,11 +430,7 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
- if (opts::threeWaySplit()) {
- Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
- Manager.registerPass(
- std::make_unique<FixupBranches>(PrintAfterBranchFixup));
- }
+ Manager.registerPass(std::make_unique<SplitFunctions>(PrintSplit));
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
More information about the llvm-commits
mailing list