[llvm] [BOLT] Setup CDSplit Pass Structure (PR #73079)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 21 20:34:06 PST 2023
https://github.com/ShatianWang created https://github.com/llvm/llvm-project/pull/73079
This commit establishes the general structure of the CDSplit
implementation without incorporating the exact splitting logic.
Currently, all functions undergo hot-cold splitting based on the
decisions made by the SplitFunctions pass. Subsequent commits
will introduce the precise splitting logic.
>From ad9c91c2348ba426a830575946cd8748bbb4ef86 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Thu, 2 Nov 2023 12:26:49 -0700
Subject: [PATCH 1/3] [BOLT] Extend calculateEmittedSize for Block Size
Calculation
This commit modifies BinaryContext::calculateEmittedSize to update the
BinaryBasicBlock::OutputAddressRange for each basic block in the input
BF. The modification is done in place, where BB.OutputAddressRange.second
less BB.OutputAddressRange.first now gives the emitted size of the basic
block.
---
bolt/include/bolt/Core/BinaryContext.h | 3 +++
bolt/lib/Core/BinaryContext.cpp | 35 +++++++++++++++++++++-----
2 files changed, 32 insertions(+), 6 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index ad1bf2baaeb5b1e..17e55a673e8b489 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -1230,6 +1230,9 @@ class BinaryContext {
///
/// Return the pair where the first size is for the main part, and the second
/// size is for the cold one.
+ /// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
+ /// function in place so that BB.OutputAddressRange.second less
+ /// BB.OutputAddressRange.first gives the emitted size of BB.
std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
bool FixBranches = true);
diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp
index 06b68765909d20e..baf86333ce53206 100644
--- a/bolt/lib/Core/BinaryContext.cpp
+++ b/bolt/lib/Core/BinaryContext.cpp
@@ -2331,14 +2331,37 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
MCAsmLayout Layout(Assembler);
Assembler.layout(Layout);
+ // Obtain fragment sizes.
+ std::vector<uint64_t> FragmentSizes(BF.getLayout().fragment_size());
+ // Main fragment size.
const uint64_t HotSize =
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
- const uint64_t ColdSize =
- std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
- [&](const uint64_t Accu, const LabelRange &Labels) {
- return Accu + Layout.getSymbolOffset(*Labels.second) -
- Layout.getSymbolOffset(*Labels.first);
- });
+ FragmentSizes.push_back(HotSize);
+ // Split fragment sizes.
+ uint64_t ColdSize = 0;
+ for (const auto &Labels : SplitLabels) {
+ uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
+ Layout.getSymbolOffset(*Labels.first);
+ FragmentSizes.push_back(Size);
+ ColdSize += Size;
+ }
+
+ // Populate new start and end offsets of each basic block.
+ BinaryBasicBlock *PrevBB = nullptr;
+ uint64_t FragmentIndex = 0;
+ for (FunctionFragment &FF : BF.getLayout().fragments()) {
+ for (BinaryBasicBlock *BB : FF) {
+ const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
+ BB->setOutputStartAddress(BBStartOffset);
+ if (PrevBB)
+ PrevBB->setOutputEndAddress(BBStartOffset);
+ PrevBB = BB;
+ }
+ if (PrevBB)
+ PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
+ FragmentIndex++;
+ PrevBB = nullptr;
+ }
// Clean-up the effect of the code emission.
for (const MCSymbol &Symbol : Assembler.symbols()) {
>From 273ee295a2bbc09a1500c867d1156ea4d29ead40 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Wed, 8 Nov 2023 08:20:11 -0800
Subject: [PATCH 2/3] [BOLT] Refactor SplitFunctions for Function Reuse
This commit updates SplitFunctions.h and SplitFunctions.cpp to enable
the reuse of createEHTrampolines, mergeEHTrampolines, hasFullProfile,
and allBlocksCold by a distinct function splitting pass (CDSplit).
---
bolt/include/bolt/Core/BinaryFunction.h | 14 ++++++++++
bolt/include/bolt/Passes/SplitFunctions.h | 32 +++++++++++------------
bolt/lib/Passes/SplitFunctions.cpp | 18 +++----------
3 files changed, 33 insertions(+), 31 deletions(-)
diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h
index 72c360ca0c2db66..3723cccc50f040c 100644
--- a/bolt/include/bolt/Core/BinaryFunction.h
+++ b/bolt/include/bolt/Core/BinaryFunction.h
@@ -1272,6 +1272,20 @@ class BinaryFunction {
/// otherwise processed.
bool isPseudo() const { return IsPseudo; }
+ /// Return true if every block in the function has a valid execution count.
+ bool hasFullProfile() const {
+ return llvm::all_of(blocks(), [](const BinaryBasicBlock &BB) {
+ return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE;
+ });
+ }
+
+ /// Return true if every block in the function has a zero execution count.
+ bool allBlocksCold() const {
+ return llvm::all_of(blocks(), [](const BinaryBasicBlock &BB) {
+ return BB.getExecutionCount() == 0;
+ });
+ }
+
/// Return true if the function contains explicit or implicit indirect branch
/// to its split fragments, e.g., split jump table, landing pad in split
/// fragment.
diff --git a/bolt/include/bolt/Passes/SplitFunctions.h b/bolt/include/bolt/Passes/SplitFunctions.h
index 4058f3317dfbdbb..91b6d5518eaab26 100644
--- a/bolt/include/bolt/Passes/SplitFunctions.h
+++ b/bolt/include/bolt/Passes/SplitFunctions.h
@@ -50,6 +50,19 @@ class SplitFunctions : public BinaryFunctionPass {
/// Split function body into fragments.
void splitFunction(BinaryFunction &Function, SplitStrategy &Strategy);
+ std::atomic<uint64_t> SplitBytesHot{0ull};
+ std::atomic<uint64_t> SplitBytesCold{0ull};
+
+public:
+ explicit SplitFunctions(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
+
+ bool shouldOptimize(const BinaryFunction &BF) const override;
+
+ const char *getName() const override { return "split-functions"; }
+
+ void runOnFunctions(BinaryContext &BC) override;
+
struct TrampolineKey {
FragmentNum SourceFN = FragmentNum::main();
const MCSymbol *Target = nullptr;
@@ -81,27 +94,14 @@ class SplitFunctions : public BinaryFunctionPass {
/// corresponding thrower block. The trampoline landing pad, when created,
/// will redirect the execution to the real landing pad in a different
/// fragment.
- TrampolineSetType createEHTrampolines(BinaryFunction &Function) const;
+ static TrampolineSetType createEHTrampolines(BinaryFunction &Function);
/// Merge trampolines into \p Layout without trampolines. The merge will place
/// a trampoline immediately before its destination. Used to revert the effect
/// of trampolines after createEHTrampolines().
- BasicBlockOrderType
+ static BasicBlockOrderType
mergeEHTrampolines(BinaryFunction &BF, BasicBlockOrderType &Layout,
- const TrampolineSetType &Trampolines) const;
-
- std::atomic<uint64_t> SplitBytesHot{0ull};
- std::atomic<uint64_t> SplitBytesCold{0ull};
-
-public:
- explicit SplitFunctions(const cl::opt<bool> &PrintPass)
- : BinaryFunctionPass(PrintPass) {}
-
- bool shouldOptimize(const BinaryFunction &BF) const override;
-
- const char *getName() const override { return "split-functions"; }
-
- void runOnFunctions(BinaryContext &BC) override;
+ const TrampolineSetType &Trampolines);
};
} // namespace bolt
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 34973cecdf49161..223f8d17367845d 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -109,21 +109,9 @@ static cl::opt<SplitFunctionsStrategy> SplitStrategy(
} // namespace opts
namespace {
-bool hasFullProfile(const BinaryFunction &BF) {
- return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
- return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE;
- });
-}
-
-bool allBlocksCold(const BinaryFunction &BF) {
- return llvm::all_of(BF.blocks(), [](const BinaryBasicBlock &BB) {
- return BB.getExecutionCount() == 0;
- });
-}
-
struct SplitProfile2 final : public SplitStrategy {
bool canSplit(const BinaryFunction &BF) override {
- return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF);
+ return BF.hasValidProfile() && BF.hasFullProfile() && !BF.allBlocksCold();
}
bool keepEmpty() override { return false; }
@@ -434,7 +422,7 @@ void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) {
}
SplitFunctions::TrampolineSetType
-SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
+SplitFunctions::createEHTrampolines(BinaryFunction &BF) {
const auto &MIB = BF.getBinaryContext().MIB;
// Map real landing pads to the corresponding trampolines.
@@ -501,7 +489,7 @@ SplitFunctions::createEHTrampolines(BinaryFunction &BF) const {
SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines(
BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout,
- const SplitFunctions::TrampolineSetType &Trampolines) const {
+ const SplitFunctions::TrampolineSetType &Trampolines) {
DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>>
IncomingTrampolines;
for (const auto &Entry : Trampolines) {
>From 2215673c1d18aca198c095f6acbc251f1af0bd97 Mon Sep 17 00:00:00 2001
From: Shatian Wang <shatian at meta.com>
Date: Thu, 2 Nov 2023 20:51:52 -0700
Subject: [PATCH 3/3] [BOLT] Setup CDSplit Pass Structure
This commit establishes the general structure of the CDSplit
implementation without incorporating the exact splitting logic.
Currently, all functions undergo hot-cold splitting based on the
decisions made by the SplitFunctions pass. Subsequent commits
will introduce the precise splitting logic.
---
bolt/include/bolt/Passes/CDSplit.h | 63 ++++++++
bolt/lib/Passes/CDSplit.cpp | 208 +++++++++++++++++++++++++
bolt/lib/Passes/CMakeLists.txt | 1 +
bolt/lib/Passes/SplitFunctions.cpp | 12 ++
bolt/lib/Rewrite/BinaryPassManager.cpp | 10 ++
bolt/lib/Utils/CommandLineOpts.cpp | 6 +
6 files changed, 300 insertions(+)
create mode 100644 bolt/include/bolt/Passes/CDSplit.h
create mode 100644 bolt/lib/Passes/CDSplit.cpp
diff --git a/bolt/include/bolt/Passes/CDSplit.h b/bolt/include/bolt/Passes/CDSplit.h
new file mode 100644
index 000000000000000..96a982683a7ec26
--- /dev/null
+++ b/bolt/include/bolt/Passes/CDSplit.h
@@ -0,0 +1,63 @@
+//===- bolt/Passes/CDSplit.h - Split functions into hot/warm/cold
+// after function reordering pass -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BOLT_PASSES_CDSPLIT
+#define BOLT_PASSES_CDSPLIT
+
+#include "bolt/Passes/SplitFunctions.h"
+#include <atomic>
+
+namespace llvm {
+namespace bolt {
+
+using BasicBlockOrder = BinaryFunction::BasicBlockOrderType;
+
+class CDSplit : public BinaryFunctionPass {
+private:
+ /// Overall stats.
+ std::atomic<uint64_t> SplitBytesHot{0ull};
+ std::atomic<uint64_t> SplitBytesCold{0ull};
+
+ /// List of functions to be considered.
+ /// All functions in the list are used to construct a call graph.
+ /// A subset of functions in this list are considered for splitting.
+ std::vector<BinaryFunction *> FunctionsToConsider;
+
+ /// Helper functions to initialize global variables.
+ void initialize(BinaryContext &BC);
+
+ /// Split function body into 3 fragments: hot / warm / cold.
+ void runOnFunction(BinaryFunction &BF);
+
+ /// Assign each basic block in the given function to either hot, cold,
+ /// or warm fragment using the CDSplit algorithm.
+ void assignFragmentThreeWay(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder);
+
+ /// Find the best split index that separates hot from warm.
+ /// The basic block whose index equals the returned split index will be the
+ /// last hot block.
+ size_t findSplitIndex(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder);
+
+public:
+ explicit CDSplit(const cl::opt<bool> &PrintPass)
+ : BinaryFunctionPass(PrintPass) {}
+
+ bool shouldOptimize(const BinaryFunction &BF) const override;
+
+ const char *getName() const override { return "cdsplit"; }
+
+ void runOnFunctions(BinaryContext &BC) override;
+};
+
+} // namespace bolt
+} // namespace llvm
+
+#endif
diff --git a/bolt/lib/Passes/CDSplit.cpp b/bolt/lib/Passes/CDSplit.cpp
new file mode 100644
index 000000000000000..cd67b24241a4249
--- /dev/null
+++ b/bolt/lib/Passes/CDSplit.cpp
@@ -0,0 +1,208 @@
+//===- bolt/Passes/CDSplit.cpp - Pass for splitting function code 3-way
+//--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the CDSplit pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bolt/Passes/CDSplit.h"
+#include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Utils/CommandLineOpts.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/MathExtras.h"
+
+#define DEBUG_TYPE "bolt-opts"
+
+using namespace llvm;
+using namespace bolt;
+
+namespace opts {
+
+extern cl::OptionCategory BoltOptCategory;
+
+extern cl::opt<bool> UseCDSplit;
+extern cl::opt<bool> SplitEH;
+extern cl::opt<unsigned> ExecutionCountThreshold;
+} // namespace opts
+
+namespace llvm {
+namespace bolt {
+
+namespace {
+/// Return true if the function should be considered for building call graph.
+bool shouldConsider(const BinaryFunction &BF) {
+ return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty();
+}
+} // anonymous namespace
+
+bool CDSplit::shouldOptimize(const BinaryFunction &BF) const {
+ // Do not split functions with a small execution count.
+ if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold)
+ return false;
+
+ // Do not split functions with at least one block that has no known
+ // execution count due to incomplete information.
+ // Do not split functions with only zero-execution count blocks
+ // as there is not enough variation in block count to justify splitting.
+ if (!BF.hasFullProfile() || BF.allBlocksCold())
+ return false;
+
+ return BinaryFunctionPass::shouldOptimize(BF);
+}
+
+/// Initialize algorithm's metadata.
+void CDSplit::initialize(BinaryContext &BC) {
+ // Construct a list of functions that are considered for building call graph.
+ // Only those in this list that evaluates true for shouldOptimize are
+ // candidates for 3-way splitting.
+ std::vector<BinaryFunction *> SortedFunctions = BC.getSortedFunctions();
+ FunctionsToConsider.reserve(SortedFunctions.size());
+ for (BinaryFunction *BF : SortedFunctions) {
+ if (shouldConsider(*BF))
+ FunctionsToConsider.push_back(BF);
+ }
+}
+
+/// Find the best index for splitting. The returned value is the index of the
+/// last hot basic block. Hence, "no splitting" is equivalent to returning the
+/// value which is one less than the size of the function.
+size_t CDSplit::findSplitIndex(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder) {
+ // Placeholder: hot-cold splitting.
+ return BF.getLayout().getMainFragment().size() - 1;
+}
+
+/// Assign each basic block in the given function to either hot, cold,
+/// or warm fragment using the CDSplit algorithm.
+void CDSplit::assignFragmentThreeWay(const BinaryFunction &BF,
+ const BasicBlockOrder &BlockOrder) {
+ size_t BestSplitIndex = findSplitIndex(BF, BlockOrder);
+
+ // Assign fragments based on the computed best split index.
+ // All basic blocks with index up to the best split index become hot.
+ // All remaining blocks are warm / cold depending on if count is
+ // greater than 0 or not.
+ FragmentNum Main(0);
+ FragmentNum Warm(1);
+ FragmentNum Cold(2);
+ for (size_t Index = 0; Index < BlockOrder.size(); Index++) {
+ BinaryBasicBlock *BB = BlockOrder[Index];
+ if (Index <= BestSplitIndex)
+ BB->setFragmentNum(Main);
+ else
+ BB->setFragmentNum(BB->getKnownExecutionCount() > 0 ? Warm : Cold);
+ }
+}
+
+void CDSplit::runOnFunction(BinaryFunction &BF) {
+ assert(!BF.empty() && "splitting an empty function");
+
+ FunctionLayout &Layout = BF.getLayout();
+ BinaryContext &BC = BF.getBinaryContext();
+
+ BasicBlockOrder NewLayout(Layout.block_begin(), Layout.block_end());
+ // Never outline the first basic block.
+ NewLayout.front()->setCanOutline(false);
+ for (BinaryBasicBlock *BB : NewLayout) {
+ if (!BB->canOutline())
+ continue;
+
+ // Do not split extra entry points in aarch64. They can be referred by
+ // using ADRs and when this happens, these blocks cannot be placed far
+ // away due to the limited range in ADR instruction.
+ if (BC.isAArch64() && BB->isEntryPoint()) {
+ BB->setCanOutline(false);
+ continue;
+ }
+
+ if (BF.hasEHRanges() && !opts::SplitEH) {
+ // We cannot move landing pads (or rather entry points for landing pads).
+ if (BB->isLandingPad()) {
+ BB->setCanOutline(false);
+ continue;
+ }
+ // We cannot move a block that can throw since exception-handling
+ // runtime cannot deal with split functions. However, if we can guarantee
+ // that the block never throws, it is safe to move the block to
+ // decrease the size of the function.
+ for (MCInst &Instr : *BB) {
+ if (BC.MIB->isInvoke(Instr)) {
+ BB->setCanOutline(false);
+ break;
+ }
+ }
+ }
+ }
+
+ // Assign each basic block in NewLayout to either hot, warm, or cold fragment.
+ assignFragmentThreeWay(BF, NewLayout);
+
+ // Make sure all non-outlineable blocks are in the main-fragment.
+ for (BinaryBasicBlock *BB : NewLayout) {
+ if (!BB->canOutline())
+ BB->setFragmentNum(FragmentNum::main());
+ }
+
+ // In case any non-outlineable blocks previously in warm or cold is now set
+ // to be in main by the preceding for loop, move them to the end of main.
+ llvm::stable_sort(NewLayout,
+ [&](const BinaryBasicBlock *L, const BinaryBasicBlock *R) {
+ return L->getFragmentNum() < R->getFragmentNum();
+ });
+
+ BF.getLayout().update(NewLayout);
+
+ // For shared objects, invoke instructions and corresponding landing pads
+ // have to be placed in the same fragment. When we split them, create
+ // trampoline landing pads that will redirect the execution to real LPs.
+ SplitFunctions::TrampolineSetType Trampolines;
+ if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit())
+ Trampolines = SplitFunctions::createEHTrampolines(BF);
+
+ if (BC.isX86() && BF.isSplit()) {
+ size_t HotSize;
+ size_t ColdSize;
+ std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(BF);
+ SplitBytesHot += HotSize;
+ SplitBytesCold += ColdSize;
+ }
+}
+
+void CDSplit::runOnFunctions(BinaryContext &BC) {
+ if (!opts::UseCDSplit)
+ return;
+
+ // Initialize global variables.
+ initialize(BC);
+
+ // Only functions satisfying shouldConsider and shouldOptimize are candidates
+ // for splitting.
+ ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
+ return !(shouldConsider(BF) && shouldOptimize(BF));
+ };
+
+ // Make function splitting decisions in parallel.
+ ParallelUtilities::runOnEachFunction(
+ BC, ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR,
+ [&](BinaryFunction &BF) { runOnFunction(BF); }, SkipFunc, "CDSplit",
+ /*ForceSequential=*/false);
+
+ if (SplitBytesHot + SplitBytesCold > 0) {
+ outs() << "BOLT-INFO: cdsplit separates " << SplitBytesHot
+ << " hot bytes from " << SplitBytesCold << " cold bytes "
+ << format("(%.2lf%% of split functions is in the main fragment)\n",
+ 100.0 * SplitBytesHot / (SplitBytesHot + SplitBytesCold));
+
+ } else
+ outs() << "BOLT-INFO: cdsplit didn't split any functions\n";
+}
+
+} // namespace bolt
+} // namespace llvm
diff --git a/bolt/lib/Passes/CMakeLists.txt b/bolt/lib/Passes/CMakeLists.txt
index b8bbe59a64480c9..4cc4b4fa6ae345c 100644
--- a/bolt/lib/Passes/CMakeLists.txt
+++ b/bolt/lib/Passes/CMakeLists.txt
@@ -9,6 +9,7 @@ add_llvm_library(LLVMBOLTPasses
CacheMetrics.cpp
CallGraph.cpp
CallGraphWalker.cpp
+ CDSplit.cpp
DataflowAnalysis.cpp
DataflowInfoManager.cpp
FrameAnalysis.cpp
diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp
index 223f8d17367845d..0c11d0fb49cd09c 100644
--- a/bolt/lib/Passes/SplitFunctions.cpp
+++ b/bolt/lib/Passes/SplitFunctions.cpp
@@ -60,6 +60,7 @@ extern cl::OptionCategory BoltOptCategory;
extern cl::opt<bool> SplitEH;
extern cl::opt<unsigned> ExecutionCountThreshold;
extern cl::opt<uint32_t> RandomSeed;
+extern cl::opt<bool> UseCDSplit;
static cl::opt<bool> AggressiveSplitting(
"split-all-cold", cl::desc("outline as many cold basic blocks as possible"),
@@ -231,6 +232,17 @@ bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const {
}
void SplitFunctions::runOnFunctions(BinaryContext &BC) {
+ if (opts::UseCDSplit &&
+ !(opts::SplitFunctions &&
+ opts::SplitStrategy == SplitFunctionsStrategy::Profile2)) {
+ errs() << "BOLT-ERROR: -use-cdsplit should be applied together with "
+ "-split-functions using default -split-strategy=profile2. "
+ "-split-functions 2-way splits functions before the function "
+ "reordering pass, while -use-cdsplit 3-way splits functions "
+ "after the function reordering pass. \n";
+ exit(1);
+ }
+
if (!opts::SplitFunctions)
return;
diff --git a/bolt/lib/Rewrite/BinaryPassManager.cpp b/bolt/lib/Rewrite/BinaryPassManager.cpp
index 37de3eabc6d235d..28983de11c3ae07 100644
--- a/bolt/lib/Rewrite/BinaryPassManager.cpp
+++ b/bolt/lib/Rewrite/BinaryPassManager.cpp
@@ -11,6 +11,7 @@
#include "bolt/Passes/Aligner.h"
#include "bolt/Passes/AllocCombiner.h"
#include "bolt/Passes/AsmDump.h"
+#include "bolt/Passes/CDSplit.h"
#include "bolt/Passes/CMOVConversion.h"
#include "bolt/Passes/FixRISCVCallsPass.h"
#include "bolt/Passes/FixRelaxationPass.h"
@@ -182,6 +183,10 @@ static cl::opt<bool>
PrintSplit("print-split", cl::desc("print functions after code splitting"),
cl::Hidden, cl::cat(BoltOptCategory));
+static cl::opt<bool> PrintCDSplit("print-cdsplit",
+ cl::desc("print functions after cdsplit"),
+ cl::Hidden, cl::cat(BoltOptCategory));
+
static cl::opt<bool>
PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
cl::Hidden, cl::cat(BoltOptCategory));
@@ -430,6 +435,11 @@ void BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
Manager.registerPass(
std::make_unique<ReorderFunctions>(PrintReorderedFunctions));
+ /// This pass three-way splits functions after function reordering.
+ Manager.registerPass(std::make_unique<CDSplit>(PrintCDSplit));
+
+ Manager.registerPass(std::make_unique<FixupBranches>(PrintAfterBranchFixup));
+
// Print final dyno stats right while CFG and instruction analysis are intact.
Manager.registerPass(
std::make_unique<DynoStatsPrintPass>(
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index a1df5de26234029..75d63e369c731e4 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -191,6 +191,12 @@ cl::opt<unsigned>
cl::init(0), cl::ZeroOrMore, cl::cat(BoltCategory),
cl::sub(cl::SubCommand::getAll()));
+cl::opt<bool>
+ UseCDSplit("use-cdsplit",
+ cl::desc("split functions into 3 fragments using the CDSplit "
+ "algorithm after function reordering pass"),
+ cl::init(false), cl::cat(BoltOptCategory));
+
bool processAllFunctions() {
if (opts::AggregateOnly)
return false;
More information about the llvm-commits
mailing list